From 6251259936ab6d5a42ceb5d45955f098ad81e463 Mon Sep 17 00:00:00 2001 From: EmirVildanov <reddog201030@gmail.com> Date: Tue, 18 Mar 2025 12:42:22 +0300 Subject: [PATCH 1/4] feat: make Tarantool errors more informative --- sbroad/sbroad-core/src/errors.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sbroad/sbroad-core/src/errors.rs b/sbroad/sbroad-core/src/errors.rs index 88f2267776..6cebc23819 100644 --- a/sbroad/sbroad-core/src/errors.rs +++ b/sbroad/sbroad-core/src/errors.rs @@ -401,20 +401,18 @@ impl std::error::Error for SbroadError {} impl<E: fmt::Debug> From<TransactionError<E>> for SbroadError { fn from(error: TransactionError<E>) -> Self { - SbroadError::FailedTo( - Action::Create, - Some(Entity::Transaction), - format_smolstr!("{error:?}"), + SbroadError::Invalid( + Entity::Transaction, + Some(format_smolstr!("Transaction error occurred: {error:?}")), ) } } impl From<Error> for SbroadError { fn from(error: Error) -> Self { - SbroadError::FailedTo( - Action::Create, - Some(Entity::Tarantool), - format_smolstr!("{error:?}"), + SbroadError::Invalid( + Entity::Tarantool, + Some(format_smolstr!("Tarantool module error occurred: {error:?}"),) ) } } -- GitLab From 05140098dd17f2fc0c1938664020914a78c2f771 Mon Sep 17 00:00:00 2001 From: EmirVildanov <reddog201030@gmail.com> Date: Tue, 25 Feb 2025 18:29:32 +0300 Subject: [PATCH 2/4] feat: refactor two stage aggregation logic, add comments and examples --- .../test/integration/groupby_test.lua | 2 +- sbroad/sbroad-core/src/errors.rs | 4 +- sbroad/sbroad-core/src/executor/tests.rs | 4 +- .../src/executor/tests/exec_plan.rs | 38 +- sbroad/sbroad-core/src/frontend/sql.rs | 2 +- sbroad/sbroad-core/src/frontend/sql/ir.rs | 1 - .../sbroad-core/src/frontend/sql/ir/tests.rs | 384 ++-- .../src/frontend/sql/ir/tests/global.rs | 46 +- .../src/frontend/sql/ir/tests/like.rs | 16 +- .../src/frontend/sql/ir/tests/limit.rs | 12 +- sbroad/sbroad-core/src/ir/aggregates.rs | 629 ++++--- .../sbroad-core/src/ir/distribution/tests.rs | 4 +- .../src/ir/explain/tests/query_explain.rs | 12 +- sbroad/sbroad-core/src/ir/function.rs | 12 +- sbroad/sbroad-core/src/ir/helpers.rs | 6 +- sbroad/sbroad-core/src/ir/helpers/tests.rs | 10 +- sbroad/sbroad-core/src/ir/node.rs | 7 +- sbroad/sbroad-core/src/ir/node/expression.rs | 2 +- .../transformation/redistribution/groupby.rs | 1631 +++++++---------- .../ir/transformation/redistribution/tests.rs | 4 +- sbroad/sbroad-core/src/utils.rs | 12 +- 21 files changed, 1348 insertions(+), 1490 deletions(-) diff --git a/sbroad/sbroad-cartridge/test_app/test/integration/groupby_test.lua b/sbroad/sbroad-cartridge/test_app/test/integration/groupby_test.lua index 3891dd00ad..75c4297bd5 100644 --- a/sbroad/sbroad-cartridge/test_app/test/integration/groupby_test.lua +++ b/sbroad/sbroad-cartridge/test_app/test/integration/groupby_test.lua @@ -881,7 +881,7 @@ groupby_queries.test_aggr_invalid = function() local _, err = api:call("sbroad.execute", { [[ SELECT "d", count(sum("e")) from "arithmetic_space" group by "d"]], {} }) - t.assert_str_contains(tostring(err), "aggregate function inside aggregate function") + t.assert_str_contains(tostring(err), "aggregate functions inside aggregate function") end groupby_queries.test_groupby_arith_expression = function() diff --git a/sbroad/sbroad-core/src/errors.rs b/sbroad/sbroad-core/src/errors.rs index 6cebc23819..e5a9b51e64 100644 --- a/sbroad/sbroad-core/src/errors.rs +++ b/sbroad/sbroad-core/src/errors.rs @@ -412,7 +412,9 @@ impl From<Error> for SbroadError { fn from(error: Error) -> Self { SbroadError::Invalid( Entity::Tarantool, - Some(format_smolstr!("Tarantool module error occurred: {error:?}"),) + Some(format_smolstr!( + "Tarantool module error occurred: {error:?}" + )), ) } } diff --git a/sbroad/sbroad-core/src/executor/tests.rs b/sbroad/sbroad-core/src/executor/tests.rs index 683588507b..3151218c58 100644 --- a/sbroad/sbroad-core/src/executor/tests.rs +++ b/sbroad/sbroad-core/src/executor/tests.rs @@ -894,9 +894,9 @@ fn groupby_linker_test() { LuaValue::String(String::from(PatternWithParams::new( format!( "{} {} {}", - r#"SELECT "column_596" as "ii" FROM"#, + r#"SELECT "gr_expr_1" as "ii" FROM"#, r#"(SELECT "COL_1" FROM "TMP_test_0136")"#, - r#"GROUP BY "column_596""#, + r#"GROUP BY "gr_expr_1""#, ), vec![], ))), diff --git a/sbroad/sbroad-core/src/executor/tests/exec_plan.rs b/sbroad/sbroad-core/src/executor/tests/exec_plan.rs index c96a5efe56..18f7bd0103 100644 --- a/sbroad/sbroad-core/src/executor/tests/exec_plan.rs +++ b/sbroad/sbroad-core/src/executor/tests/exec_plan.rs @@ -112,7 +112,7 @@ fn exec_plan_subtree_two_stage_groupby_test() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT "T1"."FIRST_NAME" as "column_596" FROM "test_space" as "T1" GROUP BY "T1"."FIRST_NAME""# + r#"SELECT "T1"."FIRST_NAME" as "gr_expr_1" FROM "test_space" as "T1" GROUP BY "T1"."FIRST_NAME""# .to_string(), vec![] ) @@ -159,9 +159,9 @@ fn exec_plan_subtree_two_stage_groupby_test_2() { sql, PatternWithParams::new( f_sql( - r#"SELECT "T1"."FIRST_NAME" as "column_596", -"T1"."sys_op" as "column_696", -"T1"."sysFrom" as "column_796" + r#"SELECT "T1"."FIRST_NAME" as "gr_expr_1", +"T1"."sys_op" as "gr_expr_2", +"T1"."sysFrom" as "gr_expr_3" FROM "test_space" as "T1" GROUP BY "T1"."FIRST_NAME", "T1"."sys_op", "T1"."sysFrom""# ), @@ -227,13 +227,13 @@ fn exec_plan_subtree_aggregates() { sql, PatternWithParams::new( f_sql( - r#"SELECT "T1"."sys_op" as "column_596", -("T1"."id") * ("T1"."sys_op") as "column_1632", -"T1"."id" as "column_2096", count ("T1"."sysFrom") as "count_1596", -sum ("T1"."id") as "sum_1796", count ("T1"."id") as "count_2696", -min ("T1"."id") as "min_3096", group_concat ("T1"."FIRST_NAME", ?) as "group_concat_2496", -total ("T1"."id") as "total_2896", -max ("T1"."id") as "max_3296" FROM "test_space" as "T1" GROUP BY "T1"."sys_op", + r#"SELECT "T1"."sys_op" as "gr_expr_1", +("T1"."id") * ("T1"."sys_op") as "gr_expr_2", +"T1"."id" as "gr_expr_3", count ("T1"."sysFrom") as "count_1", +sum ("T1"."id") as "sum_2", count ("T1"."id") as "avg_4", +min ("T1"."id") as "min_6", group_concat ("T1"."FIRST_NAME", ?) as "group_concat_3", +total ("T1"."id") as "total_5", +max ("T1"."id") as "max_7" FROM "test_space" as "T1" GROUP BY "T1"."sys_op", ("T1"."id") * ("T1"."sys_op"), "T1"."id""# ), vec![Value::from("o")] @@ -288,7 +288,7 @@ fn exec_plan_subtree_aggregates_no_groupby() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT ("T1"."id") + ("T1"."sysFrom") as "column_632", count ("T1"."sysFrom") as "count_696" FROM "test_space" as "T1" GROUP BY ("T1"."id") + ("T1"."sysFrom")"#.to_string(), + r#"SELECT ("T1"."id") + ("T1"."sysFrom") as "gr_expr_1", count ("T1"."sysFrom") as "count_1" FROM "test_space" as "T1" GROUP BY ("T1"."id") + ("T1"."sysFrom")"#.to_string(), vec![] )); @@ -447,7 +447,7 @@ fn exec_plan_subtree_count_asterisk() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT count (*) as "count_596" FROM "test_space""#.to_string(), + r#"SELECT count (*) as "count_1" FROM "test_space""#.to_string(), vec![] ) ); @@ -501,8 +501,8 @@ fn exec_plan_subtree_having() { PatternWithParams::new( format!( "{} {} {}", - r#"SELECT "T1"."sys_op" as "column_596", ("T1"."sys_op") * (?) as "column_2032","#, - r#"count (("T1"."sys_op") * (?)) as "count_2296" FROM "test_space" as "T1""#, + r#"SELECT "T1"."sys_op" as "gr_expr_1", ("T1"."sys_op") * (?) as "gr_expr_2","#, + r#"count (("T1"."sys_op") * (?)) as "count_1" FROM "test_space" as "T1""#, r#"GROUP BY "T1"."sys_op", ("T1"."sys_op") * (?)"#, ), vec![Value::Unsigned(2), Value::Unsigned(2), Value::Unsigned(2)] @@ -564,8 +564,8 @@ fn exec_plan_subtree_having_without_groupby() { PatternWithParams::new( format!( "{} {} {}", - r#"SELECT ("T1"."sys_op") * (?) as "column_1332","#, - r#"count (("T1"."sys_op") * (?)) as "count_1496" FROM "test_space" as "T1""#, + r#"SELECT ("T1"."sys_op") * (?) as "gr_expr_1","#, + r#"count (("T1"."sys_op") * (?)) as "count_1" FROM "test_space" as "T1""#, r#"GROUP BY ("T1"."sys_op") * (?)"#, ), vec![Value::Unsigned(2), Value::Unsigned(2), Value::Unsigned(2)] @@ -727,7 +727,7 @@ fn exec_plan_subquery_as_expression_under_group_by() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT ("test_space"."id") + (VALUES (?)) as "column_932", count (*) as "count_1496" FROM "test_space" GROUP BY ("test_space"."id") + (VALUES (?))"#.to_string(), + r#"SELECT ("test_space"."id") + (VALUES (?)) as "gr_expr_1", count (*) as "count_1" FROM "test_space" GROUP BY ("test_space"."id") + (VALUES (?))"#.to_string(), vec![Value::Unsigned(1u64), Value::Unsigned(1u64)] ) ); @@ -737,7 +737,7 @@ fn exec_plan_subquery_as_expression_under_group_by() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT sum ("count_1496") as "col_1" FROM (SELECT "COL_1" FROM "TMP_test_0136") GROUP BY "COL_1""#.to_string(), + r#"SELECT sum ("count_1") as "col_1" FROM (SELECT "COL_1" FROM "TMP_test_0136") GROUP BY "COL_1""#.to_string(), vec![] ) ); diff --git a/sbroad/sbroad-core/src/frontend/sql.rs b/sbroad/sbroad-core/src/frontend/sql.rs index 630731427e..8d2ebe8deb 100644 --- a/sbroad/sbroad-core/src/frontend/sql.rs +++ b/sbroad/sbroad-core/src/frontend/sql.rs @@ -2538,7 +2538,7 @@ impl ParseExpression { let arg_plan_id = arg.populate_plan(plan, worker)?; plan_arg_ids.push(arg_plan_id); } - if let Some(kind) = AggregateKind::new(name) { + if let Some(kind) = AggregateKind::from_name(name) { plan.add_aggregate_function(name, kind, plan_arg_ids, is_distinct)? } else if is_distinct { return Err(SbroadError::Invalid( diff --git a/sbroad/sbroad-core/src/frontend/sql/ir.rs b/sbroad/sbroad-core/src/frontend/sql/ir.rs index 31e2a5e04b..b96c5363a0 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir.rs @@ -452,7 +452,6 @@ impl SubtreeCloner { children: _, gr_exprs, output: _, - is_final: _, }) => { *gr_exprs = self.copy_list(gr_exprs)?; } diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs index fe52923a52..71be50a2c3 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs @@ -845,9 +845,9 @@ fn front_order_by_over_single_distribution_must_not_add_motion() { scan projection ("id_count"::unsigned -> "id_count") scan - projection (sum(("count_696"::unsigned))::unsigned -> "id_count") + projection (sum(("count_1"::unsigned))::unsigned -> "id_count") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_696") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -1298,10 +1298,10 @@ fn front_sql_groupby() { let plan = sql_to_optimized_ir(input, vec![]); println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::integer -> "identification_number", "column_696"::string -> "product_code") - group by ("column_596"::integer, "column_696"::string) output: ("column_596"::integer -> "column_596", "column_696"::string -> "column_696") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("hash_testing"."identification_number"::integer -> "column_596", "hash_testing"."product_code"::string -> "column_696") + projection ("gr_expr_1"::integer -> "identification_number", "gr_expr_2"::string -> "product_code") + group by ("gr_expr_1"::integer, "gr_expr_2"::string) output: ("gr_expr_1"::integer -> "gr_expr_1", "gr_expr_2"::string -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("hash_testing"."identification_number"::integer -> "gr_expr_1", "hash_testing"."product_code"::string -> "gr_expr_2") group by ("hash_testing"."identification_number"::integer, "hash_testing"."product_code"::string) output: ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code", "hash_testing"."product_units"::boolean -> "product_units", "hash_testing"."sys_op"::unsigned -> "sys_op", "hash_testing"."bucket_id"::unsigned -> "bucket_id") scan "hash_testing" execution options: @@ -1321,10 +1321,10 @@ fn front_sql_groupby_less_cols_in_proj() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::integer -> "identification_number") - group by ("column_596"::integer, "column_696"::boolean) output: ("column_596"::integer -> "column_596", "column_696"::boolean -> "column_696") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("hash_testing"."identification_number"::integer -> "column_596", "hash_testing"."product_units"::boolean -> "column_696") + projection ("gr_expr_1"::integer -> "identification_number") + group by ("gr_expr_1"::integer, "gr_expr_2"::boolean) output: ("gr_expr_1"::integer -> "gr_expr_1", "gr_expr_2"::boolean -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("hash_testing"."identification_number"::integer -> "gr_expr_1", "hash_testing"."product_units"::boolean -> "gr_expr_2") group by ("hash_testing"."identification_number"::integer, "hash_testing"."product_units"::boolean) output: ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code", "hash_testing"."product_units"::boolean -> "product_units", "hash_testing"."sys_op"::unsigned -> "sys_op", "hash_testing"."bucket_id"::unsigned -> "bucket_id") scan "hash_testing" execution options: @@ -1344,10 +1344,10 @@ fn front_sql_groupby_union_1() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all - projection ("column_596"::integer -> "identification_number") - group by ("column_596"::integer) output: ("column_596"::integer -> "column_596") - motion [policy: segment([ref("column_596")])] - projection ("hash_testing"."identification_number"::integer -> "column_596") + projection ("gr_expr_1"::integer -> "identification_number") + group by ("gr_expr_1"::integer) output: ("gr_expr_1"::integer -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("hash_testing"."identification_number"::integer -> "gr_expr_1") group by ("hash_testing"."identification_number"::integer) output: ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code", "hash_testing"."product_units"::boolean -> "product_units", "hash_testing"."sys_op"::unsigned -> "sys_op", "hash_testing"."bucket_id"::unsigned -> "bucket_id") scan "hash_testing" projection ("hash_testing"."identification_number"::integer -> "identification_number") @@ -1376,10 +1376,10 @@ fn front_sql_groupby_union_2() { projection ("identification_number"::integer -> "identification_number") scan union all - projection ("column_1196"::integer -> "identification_number") - group by ("column_1196"::integer) output: ("column_1196"::integer -> "column_1196") - motion [policy: segment([ref("column_1196")])] - projection ("hash_testing"."identification_number"::integer -> "column_1196") + projection ("gr_expr_1"::integer -> "identification_number") + group by ("gr_expr_1"::integer) output: ("gr_expr_1"::integer -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("hash_testing"."identification_number"::integer -> "gr_expr_1") group by ("hash_testing"."identification_number"::integer) output: ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code", "hash_testing"."product_units"::boolean -> "product_units", "hash_testing"."sys_op"::unsigned -> "sys_op", "hash_testing"."bucket_id"::unsigned -> "bucket_id") scan "hash_testing" projection ("hash_testing"."identification_number"::integer -> "identification_number") @@ -1402,10 +1402,10 @@ fn front_sql_groupby_join_1() { let plan = sql_to_optimized_ir(input, vec![]); println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_2496"::string -> "product_code", "column_2596"::boolean -> "product_units") - group by ("column_2496"::string, "column_2596"::boolean) output: ("column_2496"::string -> "column_2496", "column_2596"::boolean -> "column_2596") - motion [policy: segment([ref("column_2496"), ref("column_2596")])] - projection ("t2"."product_code"::string -> "column_2496", "t2"."product_units"::boolean -> "column_2596") + projection ("gr_expr_1"::string -> "product_code", "gr_expr_2"::boolean -> "product_units") + group by ("gr_expr_1"::string, "gr_expr_2"::boolean) output: ("gr_expr_1"::string -> "gr_expr_1", "gr_expr_2"::boolean -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t2"."product_code"::string -> "gr_expr_1", "t2"."product_units"::boolean -> "gr_expr_2") group by ("t2"."product_code"::string, "t2"."product_units"::boolean) output: ("t2"."product_units"::boolean -> "product_units", "t2"."product_code"::string -> "product_code", "t2"."identification_number"::integer -> "identification_number", "t"."id"::unsigned -> "id") join on ROW("t2"."identification_number"::integer) = ROW("t"."id"::unsigned) scan "t2" @@ -1487,9 +1487,9 @@ fn front_sql_join() { scan "hash_single_testing" motion [policy: full] scan "t2" - projection (sum(("sum_1796"::decimal))::decimal -> "id") + projection (sum(("sum_1"::decimal))::decimal -> "id") motion [policy: full] - projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1796") + projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -1507,10 +1507,10 @@ fn front_sql_groupby_insert() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" insert "t" on conflict: fail motion [policy: segment([value(NULL), ref("d")])] - projection ("column_596"::unsigned -> "b", "column_696"::unsigned -> "d") - group by ("column_596"::unsigned, "column_696"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_696"::unsigned -> "column_696") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("t"."b"::unsigned -> "column_596", "t"."d"::unsigned -> "column_696") + projection ("gr_expr_1"::unsigned -> "b", "gr_expr_2"::unsigned -> "d") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t"."b"::unsigned -> "gr_expr_1", "t"."d"::unsigned -> "gr_expr_2") group by ("t"."b"::unsigned, "t"."d"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1553,10 +1553,10 @@ fn front_sql_aggregates() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "b", ROW(sum(("count_1496"::unsigned))::unsigned) + ROW(sum(("count_1596"::unsigned))::unsigned) -> "col_1") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "count_1596"::unsigned -> "count_1596", "count_1496"::unsigned -> "count_1496") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", count(("t"."b"::unsigned))::unsigned -> "count_1596", count(("t"."a"::unsigned))::unsigned -> "count_1496") + projection ("gr_expr_1"::unsigned -> "b", ROW(sum(("count_1"::unsigned))::unsigned) + ROW(sum(("count_2"::unsigned))::unsigned) -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_2"::unsigned -> "count_2", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", count(("t"."b"::unsigned))::unsigned -> "count_2", count(("t"."a"::unsigned))::unsigned -> "count_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1571,10 +1571,10 @@ fn front_sql_distinct_asterisk() { join (select "id" from "test_space") on true"#; let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_1996"::unsigned -> "id", "column_2096"::unsigned -> "id") - group by ("column_1996"::unsigned, "column_2096"::unsigned) output: ("column_1996"::unsigned -> "column_1996", "column_2096"::unsigned -> "column_2096") - motion [policy: segment([ref("column_1996"), ref("column_2096")])] - projection ("id"::unsigned -> "column_1996", "id"::unsigned -> "column_2096") + projection ("gr_expr_1"::unsigned -> "id", "gr_expr_2"::unsigned -> "id") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("id"::unsigned -> "gr_expr_1", "id"::unsigned -> "gr_expr_2") group by ("id"::unsigned, "id"::unsigned) output: ("id"::unsigned -> "id", "id"::unsigned -> "id") join on true::boolean scan @@ -1597,9 +1597,9 @@ fn front_sql_avg_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_696"::decimal::double))::decimal / sum(("count_696"::decimal::double))::decimal -> "col_1", avg(distinct ("column_796"::decimal::double))::decimal -> "col_2", ROW(sum(("sum_696"::decimal::double))::decimal / sum(("count_696"::decimal::double))::decimal) * ROW(sum(("sum_696"::decimal::double))::decimal / sum(("count_696"::decimal::double))::decimal) -> "col_3") + projection (sum(("avg_1"::decimal::double))::decimal / sum(("avg_2"::decimal::double))::decimal -> "col_1", avg(distinct ("gr_expr_1"::decimal::double))::decimal -> "col_2", ROW(sum(("avg_1"::decimal::double))::decimal / sum(("avg_2"::decimal::double))::decimal) * ROW(sum(("avg_1"::decimal::double))::decimal / sum(("avg_2"::decimal::double))::decimal) -> "col_3") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", count(("t"."b"::unsigned))::unsigned -> "count_696", sum(("t"."b"::unsigned))::decimal -> "sum_696") + projection ("t"."b"::unsigned -> "gr_expr_1", count(("t"."b"::unsigned))::unsigned -> "avg_2", sum(("t"."b"::unsigned))::decimal -> "avg_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1616,9 +1616,9 @@ fn front_sql_total_aggregate() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (total(("total_696"::double))::double -> "col_1", total(distinct ("column_796"::double))::double -> "col_2") + projection (total(("total_1"::double))::double -> "col_1", total(distinct ("gr_expr_1"::double))::double -> "col_2") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", total(("t"."b"::unsigned))::double -> "total_696") + projection ("t"."b"::unsigned -> "gr_expr_1", total(("t"."b"::unsigned))::double -> "total_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1635,9 +1635,9 @@ fn front_sql_min_aggregate() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (min(("min_696"::unsigned))::unsigned -> "col_1", min(distinct ("column_796"::unsigned))::unsigned -> "col_2") + projection (min(("min_1"::unsigned))::unsigned -> "col_1", min(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", min(("t"."b"::unsigned))::unsigned -> "min_696") + projection ("t"."b"::unsigned -> "gr_expr_1", min(("t"."b"::unsigned))::unsigned -> "min_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1654,9 +1654,9 @@ fn front_sql_max_aggregate() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (max(("max_696"::unsigned))::unsigned -> "col_1", max(distinct ("column_796"::unsigned))::unsigned -> "col_2") + projection (max(("max_1"::unsigned))::unsigned -> "col_1", max(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", max(("t"."b"::unsigned))::unsigned -> "max_696") + projection ("t"."b"::unsigned -> "gr_expr_1", max(("t"."b"::unsigned))::unsigned -> "max_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1673,9 +1673,9 @@ fn front_sql_group_concat_aggregate() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (group_concat(("group_concat_696"::string))::string -> "col_1", group_concat(distinct ("column_796"::string))::string -> "col_2") + projection (group_concat(("group_concat_1"::string))::string -> "col_1", group_concat(distinct ("gr_expr_1"::string))::string -> "col_2") motion [policy: full] - projection ("test_space"."FIRST_NAME"::string -> "column_796", group_concat(("test_space"."FIRST_NAME"::string))::string -> "group_concat_696") + projection ("test_space"."FIRST_NAME"::string -> "gr_expr_1", group_concat(("test_space"."FIRST_NAME"::string))::string -> "group_concat_1") group by ("test_space"."FIRST_NAME"::string) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" execution options: @@ -1692,9 +1692,9 @@ fn front_sql_group_concat_aggregate2() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (group_concat(("group_concat_696"::string, ' '::string))::string -> "col_1", group_concat(distinct ("column_796"::string))::string -> "col_2") + projection (group_concat(("group_concat_1"::string, ' '::string))::string -> "col_1", group_concat(distinct ("gr_expr_1"::string))::string -> "col_2") motion [policy: full] - projection ("test_space"."FIRST_NAME"::string -> "column_796", group_concat(("test_space"."FIRST_NAME"::string, ' '::string))::string -> "group_concat_696") + projection ("test_space"."FIRST_NAME"::string -> "gr_expr_1", group_concat(("test_space"."FIRST_NAME"::string, ' '::string))::string -> "group_concat_1") group by ("test_space"."FIRST_NAME"::string) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" execution options: @@ -1709,9 +1709,9 @@ fn front_sql_string_agg_alias_to_group_concat() { let input = r#"SELECT string_agg("FIRST_NAME", ',') FROM "test_space""#; let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (group_concat(("group_concat_696"::string, ','::string))::string -> "col_1") + projection (group_concat(("group_concat_1"::string, ','::string))::string -> "col_1") motion [policy: full] - projection (group_concat(("test_space"."FIRST_NAME"::string, ','::string))::string -> "group_concat_696") + projection (group_concat(("test_space"."FIRST_NAME"::string, ','::string))::string -> "group_concat_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -1722,10 +1722,10 @@ fn front_sql_string_agg_alias_to_group_concat() { let input = r#"SELECT "id", string_agg("FIRST_NAME", ',') FROM "test_space" GROUP BY "id""#; let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "id", group_concat(("group_concat_1396"::string, ','::string))::string -> "col_1") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "group_concat_1396"::string -> "group_concat_1396") - motion [policy: segment([ref("column_596")])] - projection ("test_space"."id"::unsigned -> "column_596", group_concat(("test_space"."FIRST_NAME"::string, ','::string))::string -> "group_concat_1396") + projection ("gr_expr_1"::unsigned -> "id", group_concat(("group_concat_1"::string, ','::string))::string -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "group_concat_1"::string -> "group_concat_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("test_space"."id"::unsigned -> "gr_expr_1", group_concat(("test_space"."FIRST_NAME"::string, ','::string))::string -> "group_concat_1") group by ("test_space"."id"::unsigned) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" execution options: @@ -1742,9 +1742,9 @@ fn front_sql_count_asterisk1() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_596"::unsigned))::unsigned -> "col_1", sum(("count_596"::unsigned))::unsigned -> "col_2") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1", sum(("count_1"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count((*::integer))::unsigned -> "count_596") + projection (count((*::integer))::unsigned -> "count_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -1760,10 +1760,10 @@ fn front_sql_count_asterisk2() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_1196"::unsigned))::unsigned -> "col_1", "column_596"::unsigned -> "b") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "count_1196"::unsigned -> "count_1196") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", count((*::integer))::unsigned -> "count_1196") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1", "gr_expr_1"::unsigned -> "b") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", count((*::integer))::unsigned -> "count_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1795,10 +1795,10 @@ fn front_sql_aggregates_with_subexpressions() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "b", sum(("count_1496"::unsigned))::unsigned -> "col_1", sum(("count_1796"::unsigned))::unsigned -> "col_2") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "count_1496"::unsigned -> "count_1496", "count_1796"::unsigned -> "count_1796") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", count((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned) + ROW(1::unsigned)))::unsigned -> "count_1496", count(("func"(("t"."a"::unsigned))::integer))::unsigned -> "count_1796") + projection ("gr_expr_1"::unsigned -> "b", sum(("count_1"::unsigned))::unsigned -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1", "count_2"::unsigned -> "count_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", count((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned) + ROW(1::unsigned)))::unsigned -> "count_1", count(("func"(("t"."a"::unsigned))::integer))::unsigned -> "count_2") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1816,10 +1816,10 @@ fn front_sql_aggregates_with_distinct1() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "b", count(distinct ("column_1296"::unsigned))::unsigned -> "col_1", count(distinct ("column_596"::unsigned))::unsigned -> "col_2") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_1296"::unsigned -> "column_1296") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", "t"."a"::unsigned -> "column_1296") + projection ("gr_expr_1"::unsigned -> "b", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "col_1", count(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", "t"."a"::unsigned -> "gr_expr_2") group by ("t"."b"::unsigned, "t"."a"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1837,10 +1837,10 @@ fn front_sql_aggregates_with_distinct2() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "b", sum(distinct ("column_1232"::decimal))::decimal -> "col_1") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_1232"::unsigned -> "column_1232") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) + ROW(3::unsigned) -> "column_1232") + projection ("gr_expr_1"::unsigned -> "b", sum(distinct ("gr_expr_2"::decimal))::decimal -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) + ROW(3::unsigned) -> "gr_expr_2") group by ("t"."b"::unsigned, ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) + ROW(3::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1857,9 +1857,9 @@ fn front_sql_aggregates_with_distinct3() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(distinct ("column_632"::decimal))::decimal -> "col_1") + projection (sum(distinct ("gr_expr_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) + ROW(3::unsigned) -> "column_632") + projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) + ROW(3::unsigned) -> "gr_expr_1") group by (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) + ROW(3::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1879,7 +1879,7 @@ fn front_sql_aggregate_inside_aggregate() { .unwrap_err(); assert_eq!( - "invalid query: aggregate function inside aggregate function is not allowed.", + "invalid query: aggregate functions inside aggregate function are not allowed.", err.to_string() ); } @@ -1975,11 +1975,11 @@ fn front_sql_pg_style_params3() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_1132"::unsigned -> "col_1") - having ROW(sum(("count_1896"::unsigned))::unsigned) > ROW(42::unsigned) - group by ("column_1132"::unsigned) output: ("column_1132"::unsigned -> "column_1132", "count_1896"::unsigned -> "count_1896") - motion [policy: segment([ref("column_1132")])] - projection (ROW("t"."a"::unsigned) + ROW(42::unsigned) -> "column_1132", count(("t"."b"::unsigned))::unsigned -> "count_1896") + projection ("gr_expr_1"::unsigned -> "col_1") + having ROW(sum(("count_1"::unsigned))::unsigned) > ROW(42::unsigned) + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("t"."a"::unsigned) + ROW(42::unsigned) -> "gr_expr_1", count(("t"."b"::unsigned))::unsigned -> "count_1") group by (ROW("t"."a"::unsigned) + ROW(42::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") selection ROW("t"."a"::unsigned) = ROW(42::unsigned) scan "t" @@ -2053,9 +2053,9 @@ fn front_sql_aggregate_without_groupby() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_796"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned) + ROW(1::unsigned)))::decimal -> "sum_796") + projection (sum((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned) + ROW(1::unsigned)))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2072,9 +2072,9 @@ fn front_sql_aggregate_without_groupby2() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."col_1"::unsigned -> "col_1") scan "t1" - projection (sum(("count_696"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_696") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -2091,9 +2091,9 @@ fn front_sql_aggregate_on_aggregate() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (max(("t1"."c"::unsigned))::unsigned -> "col_1") scan "t1" - projection (sum(("count_696"::unsigned))::unsigned -> "c") + projection (sum(("count_1"::unsigned))::unsigned -> "c") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_696") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -2117,9 +2117,9 @@ fn front_sql_union_single_left() { projection ("t"."a"::unsigned -> "a") scan "t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_1296"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1296") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2141,9 +2141,9 @@ fn front_sql_union_single_right() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all motion [policy: segment([ref("col_1")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" projection ("t"."a"::unsigned -> "a") scan "t" @@ -2167,14 +2167,14 @@ fn front_sql_union_single_both() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all motion [policy: segment([ref("col_1")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_1396"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1396") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2192,9 +2192,9 @@ fn front_sql_insert_single() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" insert "t" on conflict: fail motion [policy: segment([value(NULL), ref("col_2")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1", sum(("count_896"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (sum(("t"."b"::unsigned))::decimal -> "sum_696", count(("t"."d"::unsigned))::unsigned -> "count_896") + projection (sum(("t"."b"::unsigned))::decimal -> "sum_1", count(("t"."d"::unsigned))::unsigned -> "count_2") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2216,9 +2216,9 @@ fn front_sql_except_single_right() { projection ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b") scan "t" motion [policy: segment([ref("col_1"), ref("col_2")])] - projection (sum(("sum_1396"::decimal))::decimal -> "col_1", sum(("count_1596"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count(("t"."b"::unsigned))::unsigned -> "count_1596", sum(("t"."a"::unsigned))::decimal -> "sum_1396") + projection (count(("t"."b"::unsigned))::unsigned -> "count_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2238,9 +2238,9 @@ fn front_sql_except_single_right() { projection ("t"."b"::unsigned -> "b", "t"."a"::unsigned -> "a") scan "t" motion [policy: segment([ref("col_2"), ref("col_1")])] - projection (sum(("sum_1396"::decimal))::decimal -> "col_1", sum(("count_1596"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count(("t"."b"::unsigned))::unsigned -> "count_1596", sum(("t"."a"::unsigned))::decimal -> "sum_1396") + projection (count(("t"."b"::unsigned))::unsigned -> "count_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2260,9 +2260,9 @@ fn front_sql_except_single_left() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except motion [policy: segment([ref("col_1"), ref("col_2")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1", sum(("count_896"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count(("t"."b"::unsigned))::unsigned -> "count_896", sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (count(("t"."b"::unsigned))::unsigned -> "count_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" projection ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b") scan "t" @@ -2283,14 +2283,14 @@ fn front_sql_except_single_both() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except motion [policy: segment([ref("col_1")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1", sum(("count_896"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count(("t"."b"::unsigned))::unsigned -> "count_896", sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (count(("t"."b"::unsigned))::unsigned -> "count_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_1596"::decimal))::decimal -> "col_1", sum(("sum_1796"::decimal))::decimal -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("sum_2"::decimal))::decimal -> "col_2") motion [policy: full] - projection (sum(("t"."b"::unsigned))::decimal -> "sum_1796", sum(("t"."a"::unsigned))::decimal -> "sum_1596") + projection (sum(("t"."b"::unsigned))::decimal -> "sum_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2307,10 +2307,10 @@ fn front_sql_groupby_expression() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_532"::unsigned -> "col_1") - group by ("column_532"::unsigned) output: ("column_532"::unsigned -> "column_532") - motion [policy: segment([ref("column_532")])] - projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_532") + projection ("gr_expr_1"::unsigned -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_1") group by (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2328,10 +2328,10 @@ fn front_sql_groupby_expression2() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_632"::unsigned + ROW(sum(("count_1596"::unsigned))::unsigned) -> "col_1") - group by ("column_632"::unsigned) output: ("column_632"::unsigned -> "column_632", "count_1596"::unsigned -> "count_1596") - motion [policy: segment([ref("column_632")])] - projection ((ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) -> "column_632", count(("t"."a"::unsigned))::unsigned -> "count_1596") + projection ("gr_expr_1"::unsigned + ROW(sum(("count_1"::unsigned))::unsigned) -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ((ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) -> "gr_expr_1", count(("t"."a"::unsigned))::unsigned -> "count_1") group by ((ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned))) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2349,10 +2349,10 @@ fn front_sql_groupby_expression3() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_532"::unsigned -> "col_1", "column_832"::unsigned * ROW(sum(("sum_2496"::decimal))::decimal) / ROW(sum(("count_2596"::unsigned))::unsigned) -> "col_2") - group by ("column_532"::unsigned, "column_832"::unsigned) output: ("column_532"::unsigned -> "column_532", "column_832"::unsigned -> "column_832", "count_2596"::unsigned -> "count_2596", "sum_2496"::decimal -> "sum_2496") - motion [policy: segment([ref("column_532"), ref("column_832")])] - projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_532", (ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned)) -> "column_832", count((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned)))::unsigned -> "count_2596", sum((ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned)))::decimal -> "sum_2496") + projection ("gr_expr_1"::unsigned -> "col_1", "gr_expr_2"::unsigned * ROW(sum(("sum_1"::decimal))::decimal) / ROW(sum(("count_2"::unsigned))::unsigned) -> "col_2") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "count_2"::unsigned -> "count_2", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_1", (ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned)) -> "gr_expr_2", count((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned)))::unsigned -> "count_2", sum((ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned)))::decimal -> "sum_1") group by (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned), (ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned))) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2370,10 +2370,10 @@ fn front_sql_groupby_expression4() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_532"::unsigned -> "col_1", "column_796"::unsigned -> "a") - group by ("column_532"::unsigned, "column_796"::unsigned) output: ("column_532"::unsigned -> "column_532", "column_796"::unsigned -> "column_796") - motion [policy: segment([ref("column_532"), ref("column_796")])] - projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_532", "t"."a"::unsigned -> "column_796") + projection ("gr_expr_1"::unsigned -> "col_1", "gr_expr_2"::unsigned -> "a") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_1", "t"."a"::unsigned -> "gr_expr_2") group by (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned), "t"."a"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2394,18 +2394,18 @@ fn front_sql_groupby_with_aggregates() { projection ("t1"."a"::unsigned -> "a", "t1"."b"::unsigned -> "b", "t1"."c"::decimal -> "c", "t2"."g"::unsigned -> "g", "t2"."e"::unsigned -> "e", "t2"."f"::decimal -> "f") join on ROW("t1"."a"::unsigned, "t1"."b"::unsigned) = ROW("t2"."e"::unsigned, "t2"."g"::unsigned) scan "t1" - projection ("column_596"::unsigned -> "a", "column_696"::unsigned -> "b", sum(("sum_1596"::decimal))::decimal -> "c") - group by ("column_596"::unsigned, "column_696"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_696"::unsigned -> "column_696", "sum_1596"::decimal -> "sum_1596") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("t"."a"::unsigned -> "column_596", "t"."b"::unsigned -> "column_696", sum(("t"."c"::unsigned))::decimal -> "sum_1596") + projection ("gr_expr_1"::unsigned -> "a", "gr_expr_2"::unsigned -> "b", sum(("sum_1"::decimal))::decimal -> "c") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t"."a"::unsigned -> "gr_expr_1", "t"."b"::unsigned -> "gr_expr_2", sum(("t"."c"::unsigned))::decimal -> "sum_1") group by ("t"."a"::unsigned, "t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" motion [policy: segment([ref("e"), ref("g")])] scan "t2" - projection ("column_2496"::unsigned -> "g", "column_2596"::unsigned -> "e", sum(("sum_3496"::decimal))::decimal -> "f") - group by ("column_2496"::unsigned, "column_2596"::unsigned) output: ("column_2496"::unsigned -> "column_2496", "column_2596"::unsigned -> "column_2596", "sum_3496"::decimal -> "sum_3496") - motion [policy: segment([ref("column_2496"), ref("column_2596")])] - projection ("t2"."g"::unsigned -> "column_2496", "t2"."e"::unsigned -> "column_2596", sum(("t2"."f"::unsigned))::decimal -> "sum_3496") + projection ("gr_expr_1"::unsigned -> "g", "gr_expr_2"::unsigned -> "e", sum(("sum_1"::decimal))::decimal -> "f") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t2"."g"::unsigned -> "gr_expr_1", "t2"."e"::unsigned -> "gr_expr_2", sum(("t2"."f"::unsigned))::decimal -> "sum_1") group by ("t2"."g"::unsigned, "t2"."e"::unsigned) output: ("t2"."e"::unsigned -> "e", "t2"."f"::unsigned -> "f", "t2"."g"::unsigned -> "g", "t2"."h"::unsigned -> "h", "t2"."bucket_id"::unsigned -> "bucket_id") scan "t2" execution options: @@ -2455,9 +2455,9 @@ fn front_sql_left_join_single_left() { left join on ROW("t1"."a"::decimal) = ROW("t2"."b"::unsigned) motion [policy: segment([ref("a")])] scan "t1" - projection (ROW(sum(("sum_696"::decimal))::decimal) / ROW(3::unsigned) -> "a") + projection (ROW(sum(("sum_1"::decimal))::decimal) / ROW(3::unsigned) -> "a") motion [policy: full] - projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_696") + projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1") scan "test_space" motion [policy: full] scan "t2" @@ -2486,9 +2486,9 @@ fn front_sql_left_join_single_left2() { left join on ROW("t1"."a"::decimal) + ROW(3::unsigned) <> ROW("t2"."b"::unsigned) motion [policy: segment([ref("a")])] scan "t1" - projection (ROW(sum(("sum_696"::decimal))::decimal) / ROW(3::unsigned) -> "a") + projection (ROW(sum(("sum_1"::decimal))::decimal) / ROW(3::unsigned) -> "a") motion [policy: full] - projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_696") + projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1") scan "test_space" motion [policy: full] scan "t2" @@ -2516,14 +2516,14 @@ fn front_sql_left_join_single_both() { projection ("t1"."a"::decimal -> "a", "t2"."b"::unsigned -> "b") left join on ROW("t1"."a"::decimal) <> ROW("t2"."b"::unsigned) scan "t1" - projection (ROW(sum(("sum_696"::decimal))::decimal) / ROW(3::unsigned) -> "a") + projection (ROW(sum(("sum_1"::decimal))::decimal) / ROW(3::unsigned) -> "a") motion [policy: full] - projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_696") + projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1") scan "test_space" scan "t2" - projection (sum(("count_1496"::unsigned))::unsigned -> "b") + projection (sum(("count_1"::unsigned))::unsigned -> "b") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1496") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -2570,11 +2570,11 @@ fn front_sql_having1() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "a", sum(("sum_2196"::decimal))::decimal -> "col_1") - having ROW("column_596"::unsigned) > ROW(1::unsigned) and ROW(sum(distinct ("column_1296"::decimal))::decimal) > ROW(1::unsigned) - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_1296"::unsigned -> "column_1296", "sum_2196"::decimal -> "sum_2196") - motion [policy: segment([ref("column_596")])] - projection ("t"."a"::unsigned -> "column_596", "t"."b"::unsigned -> "column_1296", sum(("t"."b"::unsigned))::decimal -> "sum_2196") + projection ("gr_expr_1"::unsigned -> "a", sum(("sum_1"::decimal))::decimal -> "col_1") + having ROW("gr_expr_1"::unsigned) > ROW(1::unsigned) and ROW(sum(distinct ("gr_expr_2"::decimal))::decimal) > ROW(1::unsigned) + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."a"::unsigned -> "gr_expr_1", "t"."b"::unsigned -> "gr_expr_2", sum(("t"."b"::unsigned))::decimal -> "sum_1") group by ("t"."a"::unsigned, "t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2593,10 +2593,10 @@ fn front_sql_having2() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (ROW(sum(("sum_1696"::decimal))::decimal) * ROW(count(distinct ("column_1596"::unsigned))::unsigned) -> "col_1", sum(("sum_1696"::decimal))::decimal -> "col_2") - having ROW(sum(distinct ("column_1596"::decimal))::decimal) > ROW(1::unsigned) and ROW(sum(("sum_1696"::decimal))::decimal) > ROW(1::unsigned) + projection (ROW(sum(("sum_1"::decimal))::decimal) * ROW(count(distinct ("gr_expr_1"::unsigned))::unsigned) -> "col_1", sum(("sum_1"::decimal))::decimal -> "col_2") + having ROW(sum(distinct ("gr_expr_1"::decimal))::decimal) > ROW(1::unsigned) and ROW(sum(("sum_1"::decimal))::decimal) > ROW(1::unsigned) motion [policy: full] - projection ("t"."b"::unsigned -> "column_1596", sum(("t"."a"::unsigned))::decimal -> "sum_1696") + projection ("t"."b"::unsigned -> "gr_expr_1", sum(("t"."a"::unsigned))::decimal -> "sum_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2615,10 +2615,10 @@ fn front_sql_having3() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_1396"::decimal))::decimal -> "col_1") - having ROW(sum(("sum_1396"::decimal))::decimal) > ROW(1::unsigned) + projection (sum(("sum_1"::decimal))::decimal -> "col_1") + having ROW(sum(("sum_1"::decimal))::decimal) > ROW(1::unsigned) motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1396") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2653,11 +2653,11 @@ fn front_sql_having_with_sq() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "sysFrom", sum(distinct ("column_3396"::decimal))::decimal -> "sum", count(distinct ("column_3396"::unsigned))::unsigned -> "count") - having ROW($0) > ROW(count(distinct ("column_3396"::unsigned))::unsigned) - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_3396"::unsigned -> "column_3396") - motion [policy: segment([ref("column_596")])] - projection ("test_space"."sysFrom"::unsigned -> "column_596", "test_space"."id"::unsigned -> "column_3396") + projection ("gr_expr_1"::unsigned -> "sysFrom", sum(distinct ("gr_expr_2"::decimal))::decimal -> "sum", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "count") + having ROW($0) > ROW(count(distinct ("gr_expr_2"::unsigned))::unsigned) + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("test_space"."sysFrom"::unsigned -> "gr_expr_1", "test_space"."id"::unsigned -> "gr_expr_2") group by ("test_space"."sysFrom"::unsigned, "test_space"."id"::unsigned) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" subquery $0: @@ -2701,11 +2701,11 @@ fn front_sql_having_with_sq_segment_motion() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "sysFrom", "column_696"::unsigned -> "sys_op", sum(distinct ("column_3296"::decimal))::decimal -> "sum", count(distinct ("column_3296"::unsigned))::unsigned -> "count") - having ROW("column_596"::unsigned, "column_696"::unsigned) in ROW($0, $0) - group by ("column_596"::unsigned, "column_696"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_696"::unsigned -> "column_696", "column_3296"::unsigned -> "column_3296") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("test_space"."sysFrom"::unsigned -> "column_596", "test_space"."sys_op"::unsigned -> "column_696", "test_space"."id"::unsigned -> "column_3296") + projection ("gr_expr_1"::unsigned -> "sysFrom", "gr_expr_2"::unsigned -> "sys_op", sum(distinct ("gr_expr_3"::decimal))::decimal -> "sum", count(distinct ("gr_expr_3"::unsigned))::unsigned -> "count") + having ROW("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) in ROW($0, $0) + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "gr_expr_3"::unsigned -> "gr_expr_3") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("test_space"."sysFrom"::unsigned -> "gr_expr_1", "test_space"."sys_op"::unsigned -> "gr_expr_2", "test_space"."id"::unsigned -> "gr_expr_3") group by ("test_space"."sysFrom"::unsigned, "test_space"."sys_op"::unsigned, "test_space"."id"::unsigned) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" subquery $0: @@ -2733,11 +2733,11 @@ fn front_sql_having_with_sq_segment_local_motion() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "sysFrom", "column_696"::unsigned -> "sys_op", sum(distinct ("column_3296"::decimal))::decimal -> "sum", count(distinct ("column_3296"::unsigned))::unsigned -> "count") - having ROW("column_596"::unsigned, "column_696"::unsigned) in ROW($0, $0) - group by ("column_596"::unsigned, "column_696"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_696"::unsigned -> "column_696", "column_3296"::unsigned -> "column_3296") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("test_space"."sysFrom"::unsigned -> "column_596", "test_space"."sys_op"::unsigned -> "column_696", "test_space"."id"::unsigned -> "column_3296") + projection ("gr_expr_1"::unsigned -> "sysFrom", "gr_expr_2"::unsigned -> "sys_op", sum(distinct ("gr_expr_3"::decimal))::decimal -> "sum", count(distinct ("gr_expr_3"::unsigned))::unsigned -> "count") + having ROW("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) in ROW($0, $0) + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "gr_expr_3"::unsigned -> "gr_expr_3") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("test_space"."sysFrom"::unsigned -> "gr_expr_1", "test_space"."sys_op"::unsigned -> "gr_expr_2", "test_space"."id"::unsigned -> "gr_expr_3") group by ("test_space"."sysFrom"::unsigned, "test_space"."sys_op"::unsigned, "test_space"."id"::unsigned) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" subquery $0: @@ -2760,9 +2760,9 @@ fn front_sql_unique_local_aggregates() { println!("{}", plan.as_explain().unwrap()); // here we must compute only two aggregates at local stage: sum(a), count(a) insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_696"::decimal))::decimal -> "col_1", sum(("count_896"::unsigned))::unsigned -> "col_2", ROW(sum(("sum_696"::decimal))::decimal) + ROW(sum(("count_896"::unsigned))::unsigned) -> "col_3") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2", ROW(sum(("sum_1"::decimal))::decimal) + ROW(sum(("count_2"::unsigned))::unsigned) -> "col_3") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_696", count(("t"."a"::unsigned))::unsigned -> "count_896") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1", count(("t"."a"::unsigned))::unsigned -> "count_2") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2782,10 +2782,10 @@ fn front_sql_unique_local_groupings() { println!("{}", plan.as_explain().unwrap()); // here we must compute only two groupby columns at local stage: a, b insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(distinct ("column_1196"::decimal))::decimal -> "col_1", count(distinct ("column_1196"::unsigned))::unsigned -> "col_2", count(distinct ("column_596"::unsigned))::unsigned -> "col_3") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_1196"::unsigned -> "column_1196") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", "t"."a"::unsigned -> "column_1196") + projection (sum(distinct ("gr_expr_2"::decimal))::decimal -> "col_1", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "col_2", count(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_3") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", "t"."a"::unsigned -> "gr_expr_2") group by ("t"."b"::unsigned, "t"."a"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2841,10 +2841,10 @@ fn front_sql_select_distinct() { println!("{}", plan.as_explain().unwrap()); // here we must compute only two groupby columns at local stage: a, b insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_896"::unsigned -> "a", "column_832"::unsigned -> "col_1") - group by ("column_896"::unsigned, "column_832"::unsigned) output: ("column_896"::unsigned -> "column_896", "column_832"::unsigned -> "column_832") - motion [policy: segment([ref("column_896"), ref("column_832")])] - projection ("t"."a"::unsigned -> "column_896", ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_832") + projection ("gr_expr_1"::unsigned -> "a", "gr_expr_2"::unsigned -> "col_1") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t"."a"::unsigned -> "gr_expr_1", ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_2") group by ("t"."a"::unsigned, ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2861,10 +2861,10 @@ fn front_sql_select_distinct_asterisk() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_996"::unsigned -> "a", "column_1096"::unsigned -> "b", "column_1196"::unsigned -> "c", "column_1296"::unsigned -> "d") - group by ("column_996"::unsigned, "column_1096"::unsigned, "column_1196"::unsigned, "column_1296"::unsigned) output: ("column_996"::unsigned -> "column_996", "column_1096"::unsigned -> "column_1096", "column_1196"::unsigned -> "column_1196", "column_1296"::unsigned -> "column_1296") - motion [policy: segment([ref("column_996"), ref("column_1096"), ref("column_1196"), ref("column_1296")])] - projection ("t"."a"::unsigned -> "column_996", "t"."b"::unsigned -> "column_1096", "t"."c"::unsigned -> "column_1196", "t"."d"::unsigned -> "column_1296") + projection ("gr_expr_1"::unsigned -> "a", "gr_expr_2"::unsigned -> "b", "gr_expr_3"::unsigned -> "c", "gr_expr_4"::unsigned -> "d") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned, "gr_expr_3"::unsigned, "gr_expr_4"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "gr_expr_3"::unsigned -> "gr_expr_3", "gr_expr_4"::unsigned -> "gr_expr_4") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2"), ref("gr_expr_3"), ref("gr_expr_4")])] + projection ("t"."a"::unsigned -> "gr_expr_1", "t"."b"::unsigned -> "gr_expr_2", "t"."c"::unsigned -> "gr_expr_3", "t"."d"::unsigned -> "gr_expr_4") group by ("t"."a"::unsigned, "t"."b"::unsigned, "t"."c"::unsigned, "t"."d"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2898,10 +2898,10 @@ fn front_sql_select_distinct_with_aggr() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_1296"::decimal))::decimal -> "col_1", "column_596"::unsigned -> "b") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "sum_1296"::decimal -> "sum_1296") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", sum(("t"."a"::unsigned))::decimal -> "sum_1296") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", "gr_expr_1"::unsigned -> "b") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", sum(("t"."a"::unsigned))::decimal -> "sum_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2918,9 +2918,9 @@ fn front_sql_select_distinct_with_aggr2() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_696"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -3244,9 +3244,9 @@ fn front_sql_update6() { subquery $0: motion [policy: full] scan - projection (sum(("sum_796"::decimal))::decimal -> "s") + projection (sum(("sum_1"::decimal))::decimal -> "s") motion [policy: full] - projection (sum(("t3"."b"::integer))::decimal -> "sum_796") + projection (sum(("t3"."b"::integer))::decimal -> "sum_1") scan "t3" execution options: sql_vdbe_opcode_max = 45000 @@ -3745,10 +3745,10 @@ fn front_subqueries_interpreted_as_expression_under_group_by() { println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_1496"::unsigned))::unsigned -> "col_1") - group by ("column_932"::unsigned) output: ("column_932"::unsigned -> "column_932", "count_1496"::unsigned -> "count_1496") - motion [policy: segment([ref("column_932")])] - projection (ROW("test_space"."id"::unsigned) + ROW($1) -> "column_932", count((*::integer))::unsigned -> "count_1496") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("test_space"."id"::unsigned) + ROW($1) -> "gr_expr_1", count((*::integer))::unsigned -> "count_1") group by (ROW("test_space"."id"::unsigned) + ROW($0)) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" subquery $0: @@ -3788,9 +3788,9 @@ fn front_select_without_scan_2() { subquery $0: motion [policy: full] scan - projection (sum(("count_796"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count((*::integer))::unsigned -> "count_796") + projection (count((*::integer))::unsigned -> "count_1") scan "t2" subquery $1: scan diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/global.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/global.rs index 880d3466e0..7d537e2a89 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/global.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/global.rs @@ -86,9 +86,9 @@ fn front_sql_global_tbl_sq1() { scan "global_t" subquery $0: scan - projection (sum(("sum_1596"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1596") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" subquery $1: motion [policy: full] @@ -120,9 +120,9 @@ fn front_sql_global_tbl_multiple_sqs1() { scan "global_t" subquery $0: scan - projection (sum(("sum_1796"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1796") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" subquery $1: scan @@ -155,9 +155,9 @@ fn front_sql_global_tbl_multiple_sqs2() { scan "global_t" subquery $0: scan - projection (sum(("sum_1796"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1796") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" subquery $1: motion [policy: full] @@ -469,9 +469,9 @@ fn front_sql_global_join4() { projection ("s"."e"::decimal -> "e") left join on true::boolean scan "s" - projection (sum(("sum_696"::decimal))::decimal -> "e") + projection (sum(("sum_1"::decimal))::decimal -> "e") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_696") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" scan "global_t" projection ("global_t"."a"::integer -> "a", "global_t"."b"::integer -> "b") @@ -500,9 +500,9 @@ fn front_sql_global_join5() { projection ("global_t"."a"::integer -> "a", "global_t"."b"::integer -> "b") scan "global_t" scan "s" - projection (sum(("sum_896"::decimal))::decimal -> "e") + projection (sum(("sum_1"::decimal))::decimal -> "e") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_896") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" execution options: sql_vdbe_opcode_max = 45000 @@ -761,11 +761,11 @@ fn front_sql_global_aggregate5() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_1432"::integer -> "col_1", sum(("sum_2896"::decimal))::decimal -> "col_2") - having ROW(sum(("sum_2296"::decimal::double))::decimal / sum(("count_2296"::decimal::double))::decimal) > ROW(3::unsigned) - group by ("column_1432"::integer) output: ("column_1432"::integer -> "column_1432", "sum_2296"::decimal -> "sum_2296", "count_2296"::unsigned -> "count_2296", "sum_2896"::decimal -> "sum_2896") - motion [policy: segment([ref("column_1432")])] - projection (ROW("global_t"."b"::integer) + ROW("global_t"."a"::integer) -> "column_1432", sum(("global_t"."b"::integer))::decimal -> "sum_2296", count(("global_t"."b"::integer))::unsigned -> "count_2296", sum(("global_t"."a"::integer))::decimal -> "sum_2896") + projection ("gr_expr_1"::integer -> "col_1", sum(("sum_1"::decimal))::decimal -> "col_2") + having ROW(sum(("avg_2"::decimal::double))::decimal / sum(("avg_3"::decimal::double))::decimal) > ROW(3::unsigned) + group by ("gr_expr_1"::integer) output: ("gr_expr_1"::integer -> "gr_expr_1", "avg_2"::decimal -> "avg_2", "avg_3"::unsigned -> "avg_3", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("global_t"."b"::integer) + ROW("global_t"."a"::integer) -> "gr_expr_1", sum(("global_t"."b"::integer))::decimal -> "avg_2", count(("global_t"."b"::integer))::unsigned -> "avg_3", sum(("global_t"."a"::integer))::decimal -> "sum_1") group by (ROW("global_t"."b"::integer) + ROW("global_t"."a"::integer)) output: ("global_t"."a"::integer -> "a", "global_t"."b"::integer -> "b") selection ROW("global_t"."a"::integer, "global_t"."b"::integer) in ROW($0, $0) scan "global_t" @@ -992,9 +992,9 @@ fn front_sql_global_union_all3() { projection ("global_t"."a"::integer -> "a") scan "global_t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_996"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_996") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" motion [policy: local] projection ("global_t"."b"::integer -> "b") @@ -1093,9 +1093,9 @@ fn front_sql_global_union2() { projection ("global_t"."a"::integer -> "a") scan "global_t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_996"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_996") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" execution options: sql_vdbe_opcode_max = 45000 @@ -1232,9 +1232,9 @@ fn check_plan_except_global_vs_single() { except projection ("global_t"."a"::integer -> "a") scan "global_t" - projection (sum(("sum_996"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_996") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" execution options: sql_vdbe_opcode_max = 45000 @@ -1254,9 +1254,9 @@ fn check_plan_except_single_vs_global() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except - projection (sum(("sum_696"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_696") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" projection ("global_t"."a"::integer -> "a") scan "global_t" diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/like.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/like.rs index 12b61c11de..19b0f36f27 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/like.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/like.rs @@ -90,10 +90,10 @@ fn like_explain3() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_332"::boolean -> "col_1") - group by ("column_332"::boolean) output: ("column_332"::boolean -> "column_332") - motion [policy: segment([ref("column_332")])] - projection (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string) -> "column_332") + projection ("gr_expr_1"::boolean -> "col_1") + group by ("gr_expr_1"::boolean) output: ("gr_expr_1"::boolean -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string) -> "gr_expr_1") group by (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string)) output: ("t1"."a"::string -> "a", "t1"."bucket_id"::unsigned -> "bucket_id", "t1"."b"::integer -> "b") scan "t1" execution options: @@ -140,10 +140,10 @@ fn ilike_explain() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_332"::boolean -> "col_1") - group by ("column_332"::boolean) output: ("column_332"::boolean -> "column_332") - motion [policy: segment([ref("column_332")])] - projection (ROW(lower(("t1"."a"::string))::string) LIKE ROW(lower(("t1"."a"::string))::string) ESCAPE ROW('x'::string) -> "column_332") + projection ("gr_expr_1"::boolean -> "col_1") + group by ("gr_expr_1"::boolean) output: ("gr_expr_1"::boolean -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW(lower(("t1"."a"::string))::string) LIKE ROW(lower(("t1"."a"::string))::string) ESCAPE ROW('x'::string) -> "gr_expr_1") group by (ROW(lower(("t1"."a"::string))::string) LIKE ROW(lower(("t1"."a"::string))::string) ESCAPE ROW('x'::string)) output: ("t1"."a"::string -> "a", "t1"."bucket_id"::unsigned -> "bucket_id", "t1"."b"::integer -> "b") scan "t1" execution options: diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs index b5a5cba46f..82874b855a 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs @@ -51,9 +51,9 @@ fn aggregate() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" limit 1 - projection (min(("min_696"::unsigned))::unsigned -> "col_1", min(distinct ("column_796"::unsigned))::unsigned -> "col_2") + projection (min(("min_1"::unsigned))::unsigned -> "col_1", min(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", min(("t"."b"::unsigned))::unsigned -> "min_696") + projection ("t"."b"::unsigned -> "gr_expr_1", min(("t"."b"::unsigned))::unsigned -> "min_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -72,10 +72,10 @@ fn group_by() { limit 555 motion [policy: full] limit 555 - projection (sum(("count_1196"::unsigned))::unsigned -> "col_1", "column_596"::unsigned -> "b") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "count_1196"::unsigned -> "count_1196") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", count((*::integer))::unsigned -> "count_1196") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1", "gr_expr_1"::unsigned -> "b") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", count((*::integer))::unsigned -> "count_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: diff --git a/sbroad/sbroad-core/src/ir/aggregates.rs b/sbroad/sbroad-core/src/ir/aggregates.rs index 924e5dd0d3..326c6b46be 100644 --- a/sbroad/sbroad-core/src/ir/aggregates.rs +++ b/sbroad/sbroad-core/src/ir/aggregates.rs @@ -1,23 +1,31 @@ +use ahash::AHashMap; use smol_str::{format_smolstr, ToSmolStr}; use crate::errors::{Entity, SbroadError}; use crate::ir::expression::cast::Type; +use crate::ir::helpers::RepeatableState; use crate::ir::node::{NodeId, Reference, StableFunction}; use crate::ir::operator::Arithmetic; use crate::ir::relation::Type as RelType; use crate::ir::Plan; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter}; +use std::hash::{Hash, Hasher}; use std::rc::Rc; -use super::expression::{ColumnPositionMap, FunctionFeature, Position}; +use super::expression::{ + ColumnPositionMap, Comparator, FunctionFeature, Position, EXPR_HASH_DEPTH, +}; +use super::function::{Behavior, Function}; use super::node::expression::Expression; +use super::node::relational::Relational; +use super::node::{Having, Projection}; use super::relation::DerivedType; use crate::frontend::sql::ir::SubtreeCloner; -/// The kind of aggregate function +/// The kind of aggregate function. /// -/// Examples: avg, sum, count, .. +/// Examples: avg, sum, count. #[derive(Clone, Debug, Hash, Eq, PartialEq, Copy)] pub enum AggregateKind { COUNT, @@ -45,23 +53,25 @@ impl Display for AggregateKind { } impl AggregateKind { + /// Returns None in case passed function name is not aggregate. #[must_use] - pub fn new(name: &str) -> Option<AggregateKind> { - let normalized = name.to_lowercase(); - match normalized.as_str() { - "count" => Some(AggregateKind::COUNT), - "sum" => Some(AggregateKind::SUM), - "avg" => Some(AggregateKind::AVG), - "total" => Some(AggregateKind::TOTAL), - "min" => Some(AggregateKind::MIN), - "max" => Some(AggregateKind::MAX), - "group_concat" | "string_agg" => Some(AggregateKind::GRCONCAT), - _ => None, - } + pub fn from_name(func_name: &str) -> Option<AggregateKind> { + let normalized = func_name.to_lowercase(); + let kind = match normalized.as_str() { + "count" => AggregateKind::COUNT, + "sum" => AggregateKind::SUM, + "avg" => AggregateKind::AVG, + "total" => AggregateKind::TOTAL, + "min" => AggregateKind::MIN, + "max" => AggregateKind::MAX, + "group_concat" | "string_agg" => AggregateKind::GRCONCAT, + _ => return None, + }; + Some(kind) } - #[inline(always)] - pub fn to_type(self, plan: &Plan, args: &[NodeId]) -> Result<DerivedType, SbroadError> { + /// Get type of the corresponding aggregate function. + pub fn get_type(self, plan: &Plan, args: &[NodeId]) -> Result<DerivedType, SbroadError> { let ty = match self { AggregateKind::COUNT => RelType::Unsigned, @@ -79,6 +89,9 @@ impl AggregateKind { Ok(DerivedType::new(ty)) } + /// Get aggregate functions that must be present on the local (Map) stage + /// of two stage aggregation in order to calculate given aggregate (`self`) + /// on the reduce stage. #[must_use] pub fn get_local_aggregates_kinds(&self) -> Vec<AggregateKind> { match self { @@ -92,34 +105,13 @@ impl AggregateKind { } } - /// Calculate argument type of aggregate function - /// - /// # Errors - /// - Invalid index - /// - Node doesn't exist in the plan - /// - Node is not an expression type - pub fn get_arg_type( - idx: usize, - plan: &Plan, - args: &[NodeId], - ) -> Result<DerivedType, SbroadError> { - let arg_id = *args.get(idx).ok_or(SbroadError::NotFound( - Entity::Index, - format_smolstr!("no element at index {idx} in args {args:?}"), - ))?; - let expr = plan.get_expression_node(arg_id)?; - expr.calculate_type(plan) - } - - /// Check agruments types of aggregate function - /// - /// # Errors - /// - Invlid plan/aggregate - /// - Invalid argument type - /// - /// # Panics - /// - Invalid argument count for aggregate + /// Check that aggregate function arguments have expected types. pub fn check_args_types(&self, plan: &Plan, args: &[NodeId]) -> Result<(), SbroadError> { + let get_arg_type = |idx: usize, args: &[NodeId]| { + let expr = plan.get_expression_node(args[idx])?; + expr.calculate_type(plan) + }; + let err = |arg_type: &RelType| -> Result<(), SbroadError> { Err(SbroadError::Invalid( Entity::Query, @@ -131,7 +123,7 @@ impl AggregateKind { }; match self { AggregateKind::SUM | AggregateKind::AVG | AggregateKind::TOTAL => { - let arg_type = Self::get_arg_type(0, plan, args)?; + let arg_type = get_arg_type(0, args)?; let Some(arg_type) = arg_type.get() else { return Ok(()); }; @@ -143,7 +135,7 @@ impl AggregateKind { } } AggregateKind::MIN | AggregateKind::MAX => { - let arg_type = Self::get_arg_type(0, plan, args)?; + let arg_type = get_arg_type(0, args)?; let Some(arg_type) = arg_type.get() else { return Ok(()); }; @@ -152,12 +144,12 @@ impl AggregateKind { } } AggregateKind::GRCONCAT => { - let arg_type_first = Self::get_arg_type(0, plan, args)?; + let arg_type_first = get_arg_type(0, args)?; let Some(first_type) = arg_type_first.get() else { return Ok(()); }; if args.len() == 2 { - let arg_type_second = Self::get_arg_type(1, plan, args)?; + let arg_type_second = get_arg_type(1, args)?; let Some(second_type) = arg_type_second.get() else { return Ok(()); }; @@ -180,10 +172,9 @@ impl AggregateKind { Ok(()) } - /// Get final aggregate corresponding to given local aggregate - /// - /// # Errors - /// - Invalid combination of this aggregate and local aggregate + /// Get final aggregate corresponding to given local aggregate. + /// 1) Checks that `local_aggregate` and final `self` aggregate corresponds to each other + /// 2) Gets type of final aggregate pub fn get_final_aggregate_kind( &self, local_aggregate: &AggregateKind, @@ -208,14 +199,21 @@ impl AggregateKind { } } -/// Helper struct for adding aggregates to ir -/// -/// This struct can be used for adding any Tarantool aggregate: -/// avg, sum, count, min, max, total -#[derive(Debug, Clone)] -pub struct SimpleAggregate { - /// The aggregate function being added, like COUNT +/// Pair of (aggregate kind, its position in the output). +pub(crate) type PositionKind = (Position, AggregateKind); + +/// Metadata about aggregates. +#[derive(Clone, Debug)] +pub struct Aggregate { + /// Id of Relational node in which this aggregate is located. + /// It can be located in `Projection`, `Having`, `OrderBy`. + pub parent_rel: NodeId, + /// Id of parent expression of aggregate function. + pub parent_expr: NodeId, + /// The aggregate function being added, like COUNT, SUM, etc. pub kind: AggregateKind, + /// "local aggregate aliases". + /// /// For non-distinct aggregate maps local aggregate kind to /// corresponding local alias. For distinct aggregate maps /// its aggregate kind to local alias used for corresponding @@ -233,79 +231,116 @@ pub struct SimpleAggregate { /// original query: `select avg(distinct b) from t` /// map query: `select b as l1 from t group by b)` /// map will contain: `avg` -> `l1` - pub lagg_alias: HashMap<AggregateKind, Rc<String>>, - /// id of aggregate function in IR + pub lagg_aliases: AHashMap<AggregateKind, Rc<String>>, + /// Id of aggregate function in plan. pub fun_id: NodeId, + /// Whether this aggregate was marked distinct in original user query + pub is_distinct: bool, } -#[cfg(not(feature = "mock"))] -#[must_use] -pub fn generate_local_alias_for_aggr(kind: &AggregateKind, suffix: &str) -> String { - format!( - "{}_{kind}_{suffix}", - uuid::Uuid::new_v4().as_simple().to_string() - ) -} - -#[cfg(feature = "mock")] -#[must_use] -pub fn generate_local_alias_for_aggr(kind: &AggregateKind, suffix: &str) -> String { - format!("{kind}_{suffix}") -} - -impl SimpleAggregate { +impl Aggregate { #[must_use] - pub fn new(name: &str, fun_id: NodeId) -> Option<SimpleAggregate> { - let kind = AggregateKind::new(name)?; - let laggr_alias: HashMap<AggregateKind, Rc<String>> = HashMap::new(); - let aggr = SimpleAggregate { + pub fn from_name( + name: &str, + fun_id: NodeId, + parent_rel: NodeId, + parent_expr: NodeId, + is_distinct: bool, + ) -> Option<Self> { + let kind = AggregateKind::from_name(name)?; + let aggr = Self { kind, fun_id, - lagg_alias: laggr_alias, + lagg_aliases: AHashMap::with_capacity(2), + parent_rel, + parent_expr, + is_distinct, }; Some(aggr) } -} - -pub(crate) type PositionKind = (Position, AggregateKind); -impl SimpleAggregate { pub(crate) fn get_position_kinds( &self, alias_to_pos: &ColumnPositionMap, - is_distinct: bool, ) -> Result<Vec<PositionKind>, SbroadError> { - if is_distinct { - let local_alias = self.lagg_alias.get(&self.kind).ok_or_else(|| { - SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!( - "missing local alias for distinct aggregate: {self:?}" - )), - ) - })?; - let position = alias_to_pos.get(local_alias)?; - Ok(vec![(position, self.kind)]) + let res = if self.is_distinct { + // For distinct aggregates kinds of + // local and final aggregates are the same. + let local_alias = self + .lagg_aliases + .get(&self.kind) + .expect("missing local alias for distinct aggregate: {self:?}"); + let pos = alias_to_pos.get(local_alias)?; + vec![(pos, self.kind)] } else { let aggr_kinds = self.kind.get_local_aggregates_kinds(); let mut res = Vec::with_capacity(aggr_kinds.len()); for aggr_kind in aggr_kinds { - let local_alias = self.lagg_alias.get(&aggr_kind).ok_or_else(|| { - SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!( - "missing local alias for local aggregate ({aggr_kind}): {self:?}" - )), - ) - })?; - let position = alias_to_pos.get(local_alias)?; - res.push((position, aggr_kind)); + let local_alias = self + .lagg_aliases + .get(&aggr_kind) + .expect("missing local alias for local aggregate ({aggr_kind}): {self:?}"); + let pos = alias_to_pos.get(local_alias)?; + res.push((pos, aggr_kind)); } - Ok(res) - } + res + }; + Ok(res) } - /// Create final aggregate expression and return its id + fn create_final_aggr( + &self, + plan: &mut Plan, + position: Position, + final_kind: AggregateKind, + ) -> Result<NodeId, SbroadError> { + let fun_expr = plan.get_expression_node(self.fun_id)?; + let col_type = fun_expr.calculate_type(plan)?; + + let ref_node = Reference { + parent: Some(self.parent_rel), + // Final node has only one required child. + targets: Some(vec![0]), + position, + col_type, + asterisk_source: None, + }; + let ref_id = plan.nodes.push(ref_node.into()); + let children: Vec<NodeId> = match self.kind { + AggregateKind::AVG => vec![plan.add_cast(ref_id, Type::Double)?], + AggregateKind::GRCONCAT => { + let Expression::StableFunction(StableFunction { children, .. }) = + plan.get_expression_node(self.fun_id)? + else { + unreachable!("Aggregate should reference expression by fun_id") + }; + + if let Some(delimiter_id) = children.get(1) { + vec![ref_id, SubtreeCloner::clone_subtree(plan, *delimiter_id)?] + } else { + vec![ref_id] + } + } + _ => vec![ref_id], + }; + let feature = if self.is_distinct { + Some(FunctionFeature::Distinct) + } else { + None + }; + let func_type = self.kind.get_type(plan, &children)?; + let final_aggr = StableFunction { + name: final_kind.to_smolstr(), + children, + feature, + func_type, + is_system: true, + }; + let aggr_id = plan.nodes.push(final_aggr.into()); + Ok(aggr_id) + } + + /// Create final aggregate expression and return its id. /// /// # Examples /// Suppose this aggregate is non-distinct `AVG` and at local stage @@ -325,121 +360,297 @@ impl SimpleAggregate { /// ```txt /// avg(column_1) /// ``` - /// - /// # Errors - /// - Invalid aggregate - /// - Could not find local alias position in child output #[allow(clippy::too_many_lines)] pub(crate) fn create_final_aggregate_expr( &self, - parent: NodeId, plan: &mut Plan, - fun_type: DerivedType, - mut position_kinds: Vec<PositionKind>, - is_distinct: bool, + position_kinds: Vec<PositionKind>, ) -> Result<NodeId, SbroadError> { - // map local AggregateKind to finalised expression of that aggregate - let mut final_aggregates: HashMap<AggregateKind, NodeId> = HashMap::new(); - let mut create_final_aggr = |position: Position, - local_kind: AggregateKind, - final_func: AggregateKind| - -> Result<(), SbroadError> { - let ref_node = Reference { - parent: Some(parent), - // projection has only one child - targets: Some(vec![0]), - position, - col_type: fun_type, - asterisk_source: None, - }; - let ref_id = plan.nodes.push(ref_node.into()); - let children = match self.kind { - AggregateKind::AVG => vec![plan.add_cast(ref_id, Type::Double)?], - AggregateKind::GRCONCAT => { - if let Expression::StableFunction(StableFunction { children, .. }) = - plan.get_expression_node(self.fun_id)? - { - if children.len() > 1 { - let second_arg = { - let a = *children - .get(1) - .ok_or(SbroadError::Invalid(Entity::Aggregate, None))?; - SubtreeCloner::clone_subtree(plan, a)? - }; - vec![ref_id, second_arg] - } else { - vec![ref_id] - } - } else { - return Err(SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!( - "fun_id ({:?}) points to other expression node", - self.fun_id - )), - )); - } - } - _ => vec![ref_id], - }; - let feature = if is_distinct { - Some(FunctionFeature::Distinct) - } else { - None - }; - let func_type = self.kind.to_type(plan, &children)?; - let final_aggr = StableFunction { - name: final_func.to_smolstr(), - children, - feature, - func_type, - is_system: true, - }; - let aggr_id = plan.nodes.push(final_aggr.into()); - final_aggregates.insert(local_kind, aggr_id); - Ok(()) - }; - if is_distinct { - let (position, kind) = position_kinds.drain(..).next().ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues("position kinds are empty".to_smolstr()) - })?; - create_final_aggr(position, kind, self.kind)?; + // Map of {local AggregateKind -> finalized expression of that aggregate}. + let mut final_aggregates: HashMap<AggregateKind, NodeId> = + HashMap::with_capacity(AGGR_CAPACITY); + + if self.is_distinct { + // For distinct aggregates kinds of local and final aggregates are the same. + let (position, local_kind) = position_kinds + .first() + .expect("Distinct aggregate should have the only position kind"); + let aggr_id = self.create_final_aggr(plan, *position, self.kind)?; + final_aggregates.insert(*local_kind, aggr_id); } else { - for (position, kind) in position_kinds { - let final_aggregate_kind = self.kind.get_final_aggregate_kind(&kind)?; - create_final_aggr(position, kind, final_aggregate_kind)?; + for (position, local_kind) in position_kinds { + let final_aggregate_kind = self.kind.get_final_aggregate_kind(&local_kind)?; + let aggr_id = self.create_final_aggr(plan, position, final_aggregate_kind)?; + final_aggregates.insert(local_kind, aggr_id); } } + let final_expr_id = if final_aggregates.len() == 1 { - *final_aggregates.values().next().ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues("final_aggregates is empty".into()) - })? + *final_aggregates.values().next().unwrap() } else { match self.kind { AggregateKind::AVG => { - let sum_aggr = *final_aggregates.get(&AggregateKind::SUM).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues( - "final_aggregates: missing final aggregate for SUM".into(), - ) - })?; - let count_aggr = - *final_aggregates.get(&AggregateKind::COUNT).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues( - "final_aggregates: missing final aggregate for COUNT".into(), - ) - })?; + let sum_aggr = *final_aggregates + .get(&AggregateKind::SUM) + .expect("SUM aggregate expr should exist for final AVG"); + let count_aggr = *final_aggregates + .get(&AggregateKind::COUNT) + .expect("COUNT aggregate expr should exist for final AVG"); plan.add_arithmetic_to_plan(sum_aggr, Arithmetic::Divide, count_aggr)? } _ => { - return Err(SbroadError::Unsupported( - Entity::Aggregate, - Some(format_smolstr!( - "aggregate with multiple final aggregates: {self:?}" - )), - )) + unreachable!("The only aggregate with multiple final aggregates is AVG") } } }; Ok(final_expr_id) } } + +/// Capacity for the vec of aggregates which we expect to extract +/// from final nodes like Projection and Having. +const AGGR_CAPACITY: usize = 10; + +/// Helper struct to find aggregates in expressions of finals. +struct AggrCollector<'plan> { + /// Id of final node in which matches are searched. + parent_rel: NodeId, + /// Collected aggregates. + aggrs: Vec<Aggregate>, + plan: &'plan Plan, +} + +impl<'plan> AggrCollector<'plan> { + pub fn with_capacity( + plan: &'plan Plan, + capacity: usize, + parent_rel: NodeId, + ) -> AggrCollector<'plan> { + AggrCollector { + aggrs: Vec::with_capacity(capacity), + parent_rel, + plan, + } + } + + /// Collect aggregates in internal field by traversing expression tree `top` + /// + /// # Arguments + /// * `top` - id of expression root in which to look for aggregates + /// * `parent_rel` - id of parent relational node, where `top` is located. It is used to + /// create `AggrInfo` + pub fn collect_aggregates(&mut self, top: NodeId) -> Result<Vec<Aggregate>, SbroadError> { + self.find(top, None)?; + Ok(std::mem::take(&mut self.aggrs)) + } + + fn find(&mut self, current: NodeId, parent_expr: Option<NodeId>) -> Result<(), SbroadError> { + let expr = self.plan.get_expression_node(current)?; + if let Expression::StableFunction(StableFunction { name, feature, .. }) = expr { + let is_distinct = matches!(feature, Some(FunctionFeature::Distinct)); + let parent_expr = parent_expr.expect( + "Aggregate stable function under final relational node should have a parent expr", + ); + if let Some(aggr) = + Aggregate::from_name(name, current, self.parent_rel, parent_expr, is_distinct) + { + self.aggrs.push(aggr); + return Ok(()); + }; + } + for child in self.plan.nodes.expr_iter(current, false) { + self.find(child, Some(current))?; + } + Ok(()) + } +} + +/// Helper struct to filter duplicate aggregates in local stage. +/// +/// Consider user query: `select sum(a), avg(a) from t` +/// at local stage we need to compute `sum(a)` only once. +/// +/// This struct contains info needed to compute hash and compare aggregates +/// used at local stage. +struct AggregateSignature<'plan> { + pub kind: AggregateKind, + /// Ids of expressions used as arguments to aggregate. + pub arguments: Vec<NodeId>, + pub plan: &'plan Plan, + /// Local alias of this local aggregate. + pub local_alias: Rc<String>, +} + +impl<'plan> Hash for AggregateSignature<'plan> { + fn hash<H: Hasher>(&self, state: &mut H) { + self.kind.hash(state); + let mut comp = Comparator::new(self.plan); + comp.set_hasher(state); + for arg in &self.arguments { + comp.hash_for_expr(*arg, EXPR_HASH_DEPTH); + } + } +} + +impl<'plan> PartialEq<Self> for AggregateSignature<'plan> { + fn eq(&self, other: &Self) -> bool { + let comparator = Comparator::new(self.plan); + self.kind == other.kind + && self + .arguments + .iter() + .zip(other.arguments.iter()) + .all(|(l, r)| comparator.are_subtrees_equal(*l, *r).unwrap_or(false)) + } +} + +impl<'plan> Eq for AggregateSignature<'plan> {} + +fn aggr_local_alias(kind: AggregateKind, index: usize) -> String { + format!("{kind}_{index}") +} + +impl Plan { + /// Collect information about aggregates. + /// + /// Aggregates can appear in `Projection`, `Having`. + /// TODO: We should also support OrderBy. + /// + /// # Arguments + /// [`finals`] - ids of nodes in final (reduce stage) before adding two stage aggregation. + /// It may contain ids of `Projection`, `Having` or `NamedWindows`. + /// Note: final `GroupBy` is not present because it will be added later in 2-stage pipeline. + pub fn collect_aggregates(&self, finals: &Vec<NodeId>) -> Result<Vec<Aggregate>, SbroadError> { + let mut aggrs = Vec::with_capacity(AGGR_CAPACITY); + for node_id in finals { + let node = self.get_relation_node(*node_id)?; + match node { + Relational::Projection(Projection { output, .. }) => { + let mut collector = AggrCollector::with_capacity(self, AGGR_CAPACITY, *node_id); + for col in self.get_row_list(*output)? { + aggrs.extend(collector.collect_aggregates(*col)?); + } + } + Relational::Having(Having { filter, .. }) => { + let mut collector = AggrCollector::with_capacity(self, AGGR_CAPACITY, *node_id); + aggrs.extend(collector.collect_aggregates(*filter)?); + } + _ => { + unreachable!( + "Unexpected {node:?} met as final relational to collect aggregates" + ) + } + } + } + + for aggr in &aggrs { + let top = aggr.fun_id; + if self.contains_aggregates(top, false)? { + return Err(SbroadError::Invalid( + Entity::Query, + Some("aggregate functions inside aggregate function are not allowed.".into()), + )); + } + } + + Ok(aggrs) + } + + pub fn create_local_aggregate( + &mut self, + kind: AggregateKind, + arguments: &[NodeId], + local_alias: &str, + ) -> Result<NodeId, SbroadError> { + let fun: Function = Function { + name: kind.to_smolstr(), + behavior: Behavior::Stable, + func_type: kind.get_type(self, arguments)?, + is_system: true, + }; + // We can reuse aggregate expression between local aggregates, because + // all local aggregates are located inside the same motion subtree and we + // assume that each local aggregate does not need to modify its expression + let local_fun_id = self.add_stable_function(&fun, arguments.to_vec(), None)?; + let alias_id = self.nodes.add_alias(local_alias, local_fun_id)?; + Ok(alias_id) + } + + /// Adds aggregates columns in `output_cols` for local `Projection` + /// + /// This function collects local aggregates from each `Aggregate`, + /// then it removes duplicates from them using `AggregateSignature`. + /// Next, it creates for each unique aggregate local alias and column. + #[allow(clippy::mutable_key_type)] + pub fn add_local_aggregates( + &mut self, + aggrs: &mut [Aggregate], + output_cols: &mut Vec<NodeId>, + ) -> Result<(), SbroadError> { + let mut local_alias_index = 1; + + // Aggregate expressions can appear in `Projection`, `Having`, `OrderBy`, if the + // same expression appears in different places, we must not calculate it separately: + // `select sum(a) from t group by b having sum(a) > 10` + // Here `sum(a)` appears both in projection and having, so we need to calculate it only once. + let mut unique_local_aggregates: HashSet<AggregateSignature, RepeatableState> = + HashSet::with_hasher(RepeatableState); + for pos in 0..aggrs.len() { + let (final_kind, arguments, aggr_kinds) = { + let aggr: &Aggregate = aggrs.get(pos).unwrap(); + if aggr.is_distinct { + continue; + } + + let Expression::StableFunction(StableFunction { + children: arguments, + .. + }) = self.get_expression_node(aggr.fun_id)? + else { + unreachable!("Aggregate should reference StableFunction by fun_id") + }; + + ( + aggr.kind, + arguments.clone(), + aggr.kind.get_local_aggregates_kinds(), + ) + }; + + for kind in aggr_kinds { + let local_alias = Rc::new(aggr_local_alias(final_kind, local_alias_index)); + + let signature = AggregateSignature { + kind, + arguments: arguments.clone(), + plan: self, + local_alias: local_alias.clone(), + }; + if let Some(sig) = unique_local_aggregates.get(&signature) { + let aggr: &mut Aggregate = aggrs.get_mut(pos).unwrap(); + aggr.lagg_aliases.insert(kind, sig.local_alias.clone()); + } else { + let aggr = aggrs.get_mut(pos).unwrap(); + + // New aggregate was really added. + local_alias_index += 1; + aggr.lagg_aliases.insert(kind, local_alias.clone()); + unique_local_aggregates.insert(signature); + } + } + } + + type LocalAggregate = (AggregateKind, Vec<NodeId>, Rc<String>); + // Add non-distinct aggregates to local projection. + let local_aggregates: Vec<LocalAggregate> = unique_local_aggregates + .into_iter() + .map(|x| (x.kind, x.arguments.clone(), x.local_alias.clone())) + .collect(); + for (kind, arguments, local_alias) in local_aggregates { + let alias_id = self.create_local_aggregate(kind, &arguments, local_alias.as_str())?; + output_cols.push(alias_id); + } + + Ok(()) + } +} diff --git a/sbroad/sbroad-core/src/ir/distribution/tests.rs b/sbroad/sbroad-core/src/ir/distribution/tests.rs index ad36a4dafb..9ebe6d4ed2 100644 --- a/sbroad/sbroad-core/src/ir/distribution/tests.rs +++ b/sbroad/sbroad-core/src/ir/distribution/tests.rs @@ -66,9 +66,9 @@ fn projection_any_dist_for_expr() { // check explain first insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_696"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_696") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 diff --git a/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs b/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs index 7e176984d7..5b3ee71b9f 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs @@ -58,9 +58,9 @@ fn test_query_explain_4() { let metadata = &RouterRuntimeMock::new(); let mut query = Query::new(metadata, sql, vec![]).unwrap(); insta::assert_snapshot!(query.to_explain().unwrap(), @r#" - projection (sum(("count_596"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count((*::integer))::unsigned -> "count_596") + projection (count((*::integer))::unsigned -> "count_1") scan "t2" execution options: sql_vdbe_opcode_max = 45000 @@ -193,10 +193,10 @@ fn test_query_explain_11() { let metadata = &RouterRuntimeMock::new(); let mut query = Query::new(metadata, sql, vec![]).unwrap(); insta::assert_snapshot!(query.to_explain().unwrap(), @r#" - projection ("column_3496"::string -> "a", sum(("count_4196"::unsigned))::unsigned -> "col_1") - group by ("column_3496"::string) output: ("column_3496"::string -> "column_3496", "count_4196"::unsigned -> "count_4196") - motion [policy: segment([ref("column_3496")])] - projection ("a"::string -> "column_3496", count(("b"::integer))::unsigned -> "count_4196") + projection ("gr_expr_1"::string -> "a", sum(("count_1"::unsigned))::unsigned -> "col_1") + group by ("gr_expr_1"::string) output: ("gr_expr_1"::string -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("a"::string -> "gr_expr_1", count(("b"::integer))::unsigned -> "count_1") group by ("a"::string) output: ("e"::unsigned -> "e", "f"::unsigned -> "f", "a"::string -> "a", "b"::integer -> "b") join on ROW("e"::unsigned) = ROW("a"::string) scan diff --git a/sbroad/sbroad-core/src/ir/function.rs b/sbroad/sbroad-core/src/ir/function.rs index 79509a861d..bf4c3f66e0 100644 --- a/sbroad/sbroad-core/src/ir/function.rs +++ b/sbroad/sbroad-core/src/ir/function.rs @@ -54,10 +54,6 @@ impl Function { impl Plan { /// Adds a stable function to the plan. - /// - /// # Errors - /// - Function is not stable. - /// - Function is not found in the plan. pub fn add_stable_function( &mut self, function: &Function, @@ -82,12 +78,6 @@ impl Plan { } /// Add aggregate function to plan - /// - /// # Errors - /// - Invalid arguments for given aggregate function - /// - /// # Panics - /// - never pub fn add_aggregate_function( &mut self, function: &str, @@ -134,7 +124,7 @@ impl Plan { }; let func_expr = StableFunction { name: function.to_lowercase().to_smolstr(), - func_type: kind.to_type(self, &children)?, + func_type: kind.get_type(self, &children)?, children, feature, is_system: true, diff --git a/sbroad/sbroad-core/src/ir/helpers.rs b/sbroad/sbroad-core/src/ir/helpers.rs index 0445917a80..5eb0dab7f8 100644 --- a/sbroad/sbroad-core/src/ir/helpers.rs +++ b/sbroad/sbroad-core/src/ir/helpers.rs @@ -395,10 +395,8 @@ impl Plan { writeln_with_tabulation(buf, tabulation_number + 1, "Filter")?; self.formatted_arena_node(buf, tabulation_number + 1, *filter)?; } - Relational::GroupBy(GroupBy { - gr_exprs, is_final, .. - }) => { - writeln!(buf, "GroupBy [is_final = {is_final}]")?; + Relational::GroupBy(GroupBy { gr_exprs, .. }) => { + writeln!(buf, "GroupBy")?; writeln_with_tabulation(buf, tabulation_number + 1, "Gr_cols:")?; for expr_id in gr_exprs { let expr = self.get_expression_node(*expr_id); diff --git a/sbroad/sbroad-core/src/ir/helpers/tests.rs b/sbroad/sbroad-core/src/ir/helpers/tests.rs index daa4424840..845ff83b06 100644 --- a/sbroad/sbroad-core/src/ir/helpers/tests.rs +++ b/sbroad/sbroad-core/src/ir/helpers/tests.rs @@ -259,7 +259,7 @@ fn simple_aggregation_with_group_by() { [id: 432] expression: Alias [name = bucket_id, child = Reference(Reference { parent: Some(NodeId { offset: 1, arena_type: Arena64 }), targets: None, position: 4, col_type: DerivedType(Some(Unsigned)), asterisk_source: None })] --------------------------------------------- --------------------------------------------- -[id: 364] relation: GroupBy [is_final = false] +[id: 364] relation: GroupBy Gr_cols: Gr_col: Reference(Reference { parent: Some(NodeId { offset: 3, arena_type: Arena64 }), targets: Some([0]), position: 1, col_type: DerivedType(Some(String)), asterisk_source: None }) Children: @@ -280,7 +280,7 @@ fn simple_aggregation_with_group_by() { Output_id: 664 [id: 664] expression: Row [distribution = Some(Any)] List: - [id: 1132] expression: Alias [name = column_596, child = Reference(Reference { parent: Some(NodeId { offset: 3, arena_type: Arena64 }), targets: Some([0]), position: 1, col_type: DerivedType(Some(String)), asterisk_source: None })] + [id: 1132] expression: Alias [name = gr_expr_1, child = Reference(Reference { parent: Some(NodeId { offset: 7, arena_type: Arena64 }), targets: Some([0]), position: 1, col_type: DerivedType(Some(String)), asterisk_source: None })] --------------------------------------------- --------------------------------------------- [id: 0136] relation: Motion [policy = Segment(MotionKey { targets: [Reference(0)] }), alias = None] @@ -289,10 +289,10 @@ fn simple_aggregation_with_group_by() { Output_id: 1064 [id: 1064] expression: Row [distribution = Some(Segment { keys: KeySet({Key { positions: [0] }}) })] List: - [id: 1332] expression: Alias [name = column_596, child = Reference(Reference { parent: Some(NodeId { offset: 0, arena_type: Arena136 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None })] + [id: 1332] expression: Alias [name = gr_expr_1, child = Reference(Reference { parent: Some(NodeId { offset: 0, arena_type: Arena136 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None })] --------------------------------------------- --------------------------------------------- -[id: 964] relation: GroupBy [is_final = true] +[id: 964] relation: GroupBy Gr_cols: Gr_col: Reference(Reference { parent: Some(NodeId { offset: 9, arena_type: Arena64 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None }) Children: @@ -300,7 +300,7 @@ fn simple_aggregation_with_group_by() { Output_id: 864 [id: 864] expression: Row [distribution = Some(Segment { keys: KeySet({Key { positions: [0] }}) })] List: - [id: 1232] expression: Alias [name = column_596, child = Reference(Reference { parent: Some(NodeId { offset: 9, arena_type: Arena64 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None })] + [id: 1232] expression: Alias [name = gr_expr_1, child = Reference(Reference { parent: Some(NodeId { offset: 9, arena_type: Arena64 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None })] --------------------------------------------- --------------------------------------------- [id: 564] relation: Projection diff --git a/sbroad/sbroad-core/src/ir/node.rs b/sbroad/sbroad-core/src/ir/node.rs index 8caa964035..4cbc2b9113 100644 --- a/sbroad/sbroad-core/src/ir/node.rs +++ b/sbroad/sbroad-core/src/ir/node.rs @@ -664,11 +664,14 @@ impl From<Selection> for NodeAligned { #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] pub struct GroupBy { - /// The first child is a relational operator before group by + /// The first child is a + /// * Scan in case it's local GroupBy + /// * Motion with policy Segment in case two stage aggregation was applied + /// + /// Other children are subqueries used under grouping expressions. pub children: Vec<NodeId>, pub gr_exprs: Vec<NodeId>, pub output: NodeId, - pub is_final: bool, } impl From<GroupBy> for NodeAligned { diff --git a/sbroad/sbroad-core/src/ir/node/expression.rs b/sbroad/sbroad-core/src/ir/node/expression.rs index 1f3cabbd0d..f1da48914f 100644 --- a/sbroad/sbroad-core/src/ir/node/expression.rs +++ b/sbroad/sbroad-core/src/ir/node/expression.rs @@ -157,7 +157,7 @@ impl Expression<'_> { #[must_use] pub fn is_aggregate_name(name: &str) -> bool { // currently we support only simple aggregates - AggregateKind::new(name).is_some() + AggregateKind::from_name(name).is_some() } #[must_use] diff --git a/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs b/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs index e1ee9248d2..69899d3582 100644 --- a/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs +++ b/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs @@ -3,132 +3,58 @@ use smol_str::{format_smolstr, ToSmolStr}; use crate::errors::{Entity, SbroadError}; use crate::executor::engine::helpers::to_user; use crate::frontend::sql::ir::SubtreeCloner; -use crate::ir::aggregates::{generate_local_alias_for_aggr, AggregateKind, SimpleAggregate}; +use crate::ir::aggregates::Aggregate; use crate::ir::distribution::Distribution; -use crate::ir::expression::{ColumnPositionMap, Comparator, FunctionFeature, EXPR_HASH_DEPTH}; +use crate::ir::expression::{ColumnPositionMap, Comparator, EXPR_HASH_DEPTH}; use crate::ir::node::expression::Expression; -use crate::ir::node::relational::{MutRelational, Relational}; -use crate::ir::node::{ - Alias, ArenaType, GroupBy, Having, NodeId, Projection, Reference, StableFunction, -}; +use crate::ir::node::relational::Relational; +use crate::ir::node::{Alias, ArenaType, GroupBy, Having, NodeId, Projection, Reference}; use crate::ir::transformation::redistribution::{ MotionKey, MotionPolicy, Program, Strategy, Target, }; -use crate::ir::tree::traversal::{BreadthFirst, PostOrderWithFilter, EXPR_CAPACITY}; +use crate::ir::tree::traversal::{PostOrder, PostOrderWithFilter, EXPR_CAPACITY}; use crate::ir::{Node, Plan}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; -use crate::ir::function::{Behavior, Function}; use crate::ir::helpers::RepeatableState; -use crate::utils::{OrderedMap, OrderedSet}; +use crate::utils::OrderedMap; use std::hash::{Hash, Hasher}; use std::rc::Rc; -const AGGR_CAPACITY: usize = 10; - -/// Helper struct to store metadata about aggregates -#[derive(Clone, Debug)] -struct AggrInfo { - /// id of Relational node in which this aggregate is located. - /// It can be located in `Projection`, `Having`, `OrderBy` - parent_rel: NodeId, - /// id of parent expression of aggregate function, - /// if there is no parent it's `None` - parent_expr: Option<NodeId>, - /// info about what aggregate it is: sum, count, ... - aggr: SimpleAggregate, - /// whether this aggregate was marked distinct in original user query - is_distinct: bool, -} - -/// Helper struct to find aggregates in expressions of finals -struct AggrCollector<'plan> { - /// id of final node in which matches are searched - parent_rel: Option<NodeId>, - /// collected aggregates - infos: Vec<AggrInfo>, - plan: &'plan Plan, -} - /// Helper struct to hold information about /// location of grouping expressions used in /// nodes other than `GroupBy`. /// -/// For example grouping expressions can appear -/// in `Projection`, `Having`, `OrderBy` -struct ExpressionLocationIds { - pub parent_expr: Option<NodeId>, - pub expr: NodeId, - pub rel: NodeId, -} - -impl ExpressionLocationIds { - pub fn new(expr_id: NodeId, parent_expr_id: Option<NodeId>, rel_id: NodeId) -> Self { - ExpressionLocationIds { - parent_expr: parent_expr_id, - expr: expr_id, - rel: rel_id, - } - } -} - -/// Helper struct to filter duplicate aggregates in local stage. -/// -/// Consider user query: `select sum(a), avg(a) from t` -/// at local stage we need to compute `sum(a)` only once. -/// -/// This struct contains info needed to compute hash and compare aggregates -/// used at local stage. -struct AggregateSignature<'plan, 'args> { - pub kind: AggregateKind, - /// ids of expressions used as arguments to aggregate - pub arguments: &'args Vec<NodeId>, - pub plan: &'plan Plan, - /// reference to local alias of this local aggregate - pub local_alias: Option<Rc<String>>, -} - -impl AggregateSignature<'_, '_> { - pub fn get_alias(&self) -> Result<Rc<String>, SbroadError> { - let r = self - .local_alias - .as_ref() - .ok_or_else(|| { - SbroadError::Invalid( - Entity::AggregateSignature, - Some("missing local alias".into()), - ) - })? - .clone(); - Ok(r) - } +/// E.g. for query `select 1 + a from t group by a` +/// location for grouping expression `a` will look like +/// { +/// `expr`: id of a under sum expr, +/// `parent_expr`: Some(id of sum expr), +/// `rel`: Projection +/// } +#[derive(Debug, Clone)] +struct ExpressionLocationId { + /// Id of grouping expression. + pub expr_id: NodeId, + /// Id of expression which is a parent of `expr`. + pub parent_expr_id: NodeId, + /// Relational node in which this `expr` is used. + pub rel_id: NodeId, } -impl Hash for AggregateSignature<'_, '_> { - fn hash<H: Hasher>(&self, state: &mut H) { - self.kind.hash(state); - let mut comp = Comparator::new(self.plan); - comp.set_hasher(state); - for arg in self.arguments { - comp.hash_for_expr(*arg, EXPR_HASH_DEPTH); +impl ExpressionLocationId { + pub fn new(expr_id: NodeId, parent_expr_id: NodeId, rel_id: NodeId) -> Self { + ExpressionLocationId { + parent_expr_id, + expr_id, + rel_id, } } } -impl PartialEq<Self> for AggregateSignature<'_, '_> { - fn eq(&self, other: &Self) -> bool { - let comparator = Comparator::new(self.plan); - self.kind == other.kind - && self - .arguments - .iter() - .zip(other.arguments.iter()) - .all(|(l, r)| comparator.are_subtrees_equal(*l, *r).unwrap_or(false)) - } -} - -impl Eq for AggregateSignature<'_, '_> {} - +/// Id of grouping expression united with reference to plan +/// for the ease of expressions comparison (see +/// implementation of `Hash` and `PartialEq` traits). #[derive(Debug, Clone)] struct GroupingExpression<'plan> { pub id: NodeId, @@ -156,65 +82,7 @@ impl PartialEq for GroupingExpression<'_> { } } -impl Eq for GroupingExpression<'_> {} - -impl<'plan> AggrCollector<'plan> { - pub fn with_capacity(plan: &'plan Plan, capacity: usize) -> AggrCollector<'plan> { - AggrCollector { - infos: Vec::with_capacity(capacity), - parent_rel: None, - plan, - } - } - - pub fn take_aggregates(&mut self) -> Vec<AggrInfo> { - std::mem::take(&mut self.infos) - } - - /// Collect aggregates in internal field by traversing expression tree `top` - /// - /// # Arguments - /// * `top` - id of expression root in which to look for aggregates - /// * `parent_rel` - id of parent relational node, where `top` is located. It is used to - /// create `AggrInfo` - /// - /// # Errors - /// - invalid expression tree pointed by `top` - pub fn collect_aggregates( - &mut self, - top: NodeId, - parent_rel: NodeId, - ) -> Result<(), SbroadError> { - self.parent_rel = Some(parent_rel); - self.find(top, None)?; - self.parent_rel = None; - Ok(()) - } - - fn find(&mut self, current: NodeId, parent: Option<NodeId>) -> Result<(), SbroadError> { - let expr = self.plan.get_expression_node(current)?; - if let Expression::StableFunction(StableFunction { name, feature, .. }) = expr { - let is_distinct = matches!(feature, Some(FunctionFeature::Distinct)); - if let Some(aggr) = SimpleAggregate::new(name, current) { - let Some(parent_rel) = self.parent_rel else { - return Err(SbroadError::Invalid(Entity::AggregateCollector, None)); - }; - let info = AggrInfo { - parent_rel, - parent_expr: parent, - aggr, - is_distinct, - }; - self.infos.push(info); - return Ok(()); - }; - } - for child in self.plan.nodes.expr_iter(current, false) { - self.find(child, Some(current))?; - } - Ok(()) - } -} +impl<'plan> Eq for GroupingExpression<'plan> {} /// Maps id of `GroupBy` expression used in `GroupBy` (from local stage) /// to list of locations where this expression is used in other relational @@ -229,11 +97,12 @@ impl<'plan> AggrCollector<'plan> { /// In case there is a reference (or expression containing it) in the final relational operator /// that doesn't correspond to any GroupBy expression, an error should have been thrown on the /// stage of `collect_grouping_expressions`. -type GroupbyExpressionsMap = HashMap<NodeId, Vec<ExpressionLocationIds>>; +type GroupbyExpressionsMap = HashMap<NodeId, Vec<ExpressionLocationId>>; + /// Maps id of `GroupBy` expression used in `GroupBy` (from local stage) /// to corresponding local alias used in local Projection. Note: /// this map does not contain mappings between grouping expressions from -/// distinct aggregates (it is stored in corresponding `AggrInfo` for that +/// distinct aggregates (it is stored in corresponding `Aggregate` for that /// aggregate) /// /// For example: @@ -241,27 +110,30 @@ type GroupbyExpressionsMap = HashMap<NodeId, Vec<ExpressionLocationIds>>; /// map query: `select a as l1, b group by a, b` /// Then this map will map id of `a` to `l1` type LocalAliasesMap = HashMap<NodeId, Rc<String>>; -type LocalAggrInfo = (AggregateKind, Vec<NodeId>, Rc<String>); /// Helper struct to map expressions used in `GroupBy` to /// expressions used in some other node (`Projection`, `Having`, `OrderBy`) struct ExpressionMapper<'plan> { /// List of expressions ids of `GroupBy` gr_exprs: &'plan Vec<NodeId>, - map: GroupbyExpressionsMap, + map: &'plan mut GroupbyExpressionsMap, plan: &'plan Plan, /// Id of relational node (`Projection`, `Having`, `OrderBy`) - node_id: Option<NodeId>, + rel_id: NodeId, } impl<'plan> ExpressionMapper<'plan> { - fn new(gr_expressions: &'plan Vec<NodeId>, plan: &'plan Plan) -> ExpressionMapper<'plan> { - let map: GroupbyExpressionsMap = HashMap::new(); + fn new( + gr_exprs: &'plan Vec<NodeId>, + plan: &'plan Plan, + rel_id: NodeId, + map: &'plan mut GroupbyExpressionsMap, + ) -> ExpressionMapper<'plan> { ExpressionMapper { - gr_exprs: gr_expressions, + gr_exprs, map, plan, - node_id: None, + rel_id, } } @@ -269,37 +141,21 @@ impl<'plan> ExpressionMapper<'plan> { /// to find subexpressions that match expressions located in `GroupBy`, /// when match is found it is stored in map passed to [`ExpressionMapper`]'s /// constructor. - /// - /// # Arguments - /// * `expr_root` - expression id from which matching will start - /// * `node_id` - id of relational node (`Having`, `Projection`, `OrderBy`), - /// where expression pointed by `expr_root` is located - /// - /// # Errors - /// - invalid references in any expression (`GroupBy`'s or node's one) - /// - invalid query: node expression contains references that are not - /// found in `GroupBy` expression. The reason is that user specified expression in - /// node that does not match any expression in `GroupBy` - fn find_matches(&mut self, expr_root: NodeId, node_id: NodeId) -> Result<(), SbroadError> { - self.node_id = Some(node_id); + fn find_matches(&mut self, expr_root: NodeId) -> Result<(), SbroadError> { self.find(expr_root, None)?; - self.node_id = None; Ok(()) } /// Helper function for `find_matches` which compares current node to `GroupBy` expressions /// and if no match is found recursively calls itself. - fn find(&mut self, current: NodeId, parent: Option<NodeId>) -> Result<(), SbroadError> { - let Some(node_id) = self.node_id else { - return Err(SbroadError::Invalid(Entity::ExpressionMapper, None)); - }; + fn find(&mut self, current: NodeId, parent_expr: Option<NodeId>) -> Result<(), SbroadError> { let is_ref = matches!( self.plan.get_expression_node(current), Ok(Expression::Reference(_)) ); let is_sq_ref = is_ref && self.plan.is_additional_child_of_rel( - node_id, + self.rel_id, self.plan.get_relational_from_reference_node(current)?, )?; // Because subqueries are replaced with References, we must not @@ -318,7 +174,13 @@ impl<'plan> ExpressionMapper<'plan> { }) .copied() { - let location = ExpressionLocationIds::new(current, parent, node_id); + let parent_expr = parent_expr.unwrap_or_else(|| { + panic!( + "parent expression for grouping expression under {:?} rel node should be found", + self.rel_id + ) + }); + let location = ExpressionLocationId::new(current, parent_expr, self.rel_id); if let Some(v) = self.map.get_mut(&gr_expr) { v.push(location); } else { @@ -349,33 +211,76 @@ impl<'plan> ExpressionMapper<'plan> { } Ok(()) } +} + +fn grouping_expr_local_alias(index: usize) -> Rc<String> { + Rc::new(format!("gr_expr_{index}")) +} - pub fn get_matches(&mut self) -> GroupbyExpressionsMap { - std::mem::take(&mut self.map) +/// Capacity for the vecs/maps of grouping expressions we expect +/// to extract from nodes like Projection, GroupBy and Having. +const GR_EXPR_CAPACITY: usize = 5; + +/// Info helpful to generate final GroupBy node (on Reduce stage). +struct GroupByReduceInfo { + local_aliases_map: LocalAliasesMap, + /// Positions of grouping expressions added to the output of local + /// Projection. Used for generating MotionKey for segmented motion. + /// That's the reason we don't count grouping expressions came from + /// distinct aggregates here as they don't influence distribution. + grouping_positions: Vec<usize>, +} + +impl GroupByReduceInfo { + fn new() -> Self { + Self { + local_aliases_map: HashMap::with_capacity(GR_EXPR_CAPACITY), + grouping_positions: Vec::with_capacity(GR_EXPR_CAPACITY), + } } } -impl Plan { - #[allow(unreachable_code)] - fn generate_local_alias(id: NodeId) -> String { - #[cfg(feature = "mock")] - { - return format!("column_{id}"); +/// Info about grouping expressions that would be used under local Projection +/// * Their aliases (gr_expr_<INDEX>) +/// * Relation id from which this grouping expression came from (it may +/// be user defined GroupBy or distinct aggregates under Projection or Having) +#[derive(Clone)] +struct LocalProjectionGroupingExprInfo { + local_alias: Rc<String>, + parent_rel_id: NodeId, +} + +/// Info about both local and final GroupBy nodes. Such info is not +/// generated in case query doesn't require GroupBy nodes. E.g. `select sum(a) from t` +/// will require only two additional nodes: local Projection and a Motion node (see +/// logic under `add_two_stage_aggregation`). +struct GroupByInfo { + id: NodeId, + grouping_exprs: Vec<NodeId>, + gr_exprs_map: GroupbyExpressionsMap, + /// Map of { grouping_expr under local GroupBy -> its alias () + parent rel_id }. + grouping_expr_to_alias_map: + OrderedMap<NodeId, LocalProjectionGroupingExprInfo, RepeatableState>, + reduce_info: GroupByReduceInfo, +} + +impl GroupByInfo { + fn new(id: NodeId) -> Self { + Self { + id, + grouping_exprs: Vec::with_capacity(GR_EXPR_CAPACITY), + gr_exprs_map: HashMap::with_capacity(GR_EXPR_CAPACITY), + grouping_expr_to_alias_map: OrderedMap::with_hasher(RepeatableState), + reduce_info: GroupByReduceInfo::new(), } - format!("{}_{id}", uuid::Uuid::new_v4().as_simple()) } +} +impl Plan { /// Used to create a `GroupBy` IR node from AST. /// The added `GroupBy` node is local - meaning /// that it is part of local stage in 2-stage /// aggregation. For more info, see `add_two_stage_aggregation`. - /// - /// # Arguments - /// * `children` - plan's ids of `group by` children from AST - /// - /// # Errors - /// - invalid children count - /// - failed to create output for `GroupBy` pub fn add_groupby_from_ast(&mut self, children: &[NodeId]) -> Result<NodeId, SbroadError> { let Some((first_child, other)) = children.split_first() else { return Err(SbroadError::UnexpectedNumberOfValues( @@ -383,98 +288,34 @@ impl Plan { )); }; - let groupby_id = self.add_groupby(*first_child, other, false, None)?; + let groupby_id = self.add_groupby(*first_child, other, None)?; Ok(groupby_id) } - /// Helper function to add `group by` to IR - /// - /// # Errors - /// - `child_id` - invalid `Relational` node - /// - `grouping_exprs` - contains non-expr id + /// Helper function to add `group by` to IR. pub fn add_groupby( &mut self, child_id: NodeId, grouping_exprs: &[NodeId], - is_final: bool, - expr_parent: Option<NodeId>, + prev_refs_parent_id: Option<NodeId>, ) -> Result<NodeId, SbroadError> { let final_output = self.add_row_for_output(child_id, &[], true, None)?; let groupby = GroupBy { children: [child_id].to_vec(), gr_exprs: grouping_exprs.to_vec(), output: final_output, - is_final, }; let groupby_id = self.add_relational(groupby.into())?; self.replace_parent_in_subtree(final_output, None, Some(groupby_id))?; for expr in grouping_exprs { - self.replace_parent_in_subtree(*expr, expr_parent, Some(groupby_id))?; + self.replace_parent_in_subtree(*expr, prev_refs_parent_id, Some(groupby_id))?; } Ok(groupby_id) } - /// Collect information about aggregates - /// - /// Aggregates can appear in `Projection`, `Having`, `OrderBy` - /// - /// # Arguments - /// [`finals`] - ids of nodes in final (reduce stage) before adding two stage aggregation. - /// It may contain ids of `Projection`, `Having` or `NamedWindows`. - /// Note: final `GroupBy` is not present because it will be added later in 2-stage pipeline. - fn collect_aggregates(&self, finals: &Vec<NodeId>) -> Result<Vec<AggrInfo>, SbroadError> { - let mut collector = AggrCollector::with_capacity(self, AGGR_CAPACITY); - for node_id in finals { - let node = self.get_relation_node(*node_id)?; - match node { - Relational::Projection(Projection { output, .. }) => { - for col in self.get_row_list(*output)? { - collector.collect_aggregates(*col, *node_id)?; - } - } - Relational::NamedWindows(_) => { - unreachable!("NamedWindows node should not be present in finals"); - } - Relational::Having(Having { filter, .. }) => { - collector.collect_aggregates(*filter, *node_id)?; - } - _ => { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "unexpected relational node ({node_id:?}): {node:?}" - )), - )) - } - } - } - - let aggr_infos = collector.take_aggregates(); - self.validate_aggregates(&aggr_infos)?; - - Ok(aggr_infos) - } - - /// Validates expressions used in aggregates - /// - /// Currently we only check that there is no aggregates inside aggregates - fn validate_aggregates(&self, aggr_infos: &Vec<AggrInfo>) -> Result<(), SbroadError> { - for info in aggr_infos { - let top = info.aggr.fun_id; - if self.contains_aggregates(top, false)? { - return Err(SbroadError::Invalid( - Entity::Query, - Some("aggregate function inside aggregate function is not allowed.".into()), - )); - } - } - - Ok(()) - } - /// Get ids of nodes in Reduce stage (finals) and id of the top node in Map stage. /// /// Finals are nodes in Reduce stage without final `GroupBy`. @@ -494,7 +335,7 @@ impl Plan { &self, final_proj_id: NodeId, ) -> Result<(Vec<NodeId>, NodeId), SbroadError> { - let mut finals: Vec<NodeId> = vec![]; + let mut finals: Vec<NodeId> = Vec::with_capacity(3); let get_first_child = |rel_id: NodeId| -> Result<NodeId, SbroadError> { let c = *self .get_relational_children(rel_id)? @@ -526,208 +367,265 @@ impl Plan { )) } - /// Collects information about grouping expressions for future use. - /// In case there is a `Projection` with `distinct` modifier and - /// no `GroupBy` node, a `GroupBy` node with projection expressions - /// will be created. - /// This function also does all the validation of incorrect usage of - /// expressions used outside of aggregate functions. - /// - /// # Returns - /// - id of `GroupBy` node if is was created or `upper` otherwise - /// - list of ids of expressions used in `GroupBy`. Duplicate expressions are removed. - /// - mapping between `GroupBy` expressions and corresponding expressions in final nodes - /// (`Projection`, `Having`, `GroupBy`, `OrderBy`). - /// - /// # Arguments - /// * `upper` - id of the top node in reduce stage, if `GroupBy` is present in the query - /// the top node in Reduce stage will be `GroupBy`. - /// * `finals` - ids of nodes in final stage starting from `Projection` - /// - /// # Errors - /// - invalid references in `GroupBy` - /// - invalid query with `GroupBy`: some expression in some final node wasn't matched to - /// some `GroupBy` expression - /// - invalid query without `GroupBy` and with aggregates: there are column references outside - /// aggregate functions - /// - invalid query with `Having`: in case there's no `GroupBy`, `Having` may contain - /// only expressions with constants and aggregates. References outside of aggregate functions - /// are illegal. - #[allow(clippy::too_many_lines)] - fn collect_grouping_expressions( + /// In case we deal with a query containing "distinct" qualifier and + /// not containing aggregates or user defined GroupBy, we have to add + /// GroupBy node for fulfill "distinct" semantics. + fn add_group_by_for_distinct( &mut self, + proj_id: NodeId, upper: NodeId, - finals: &Vec<NodeId>, - has_aggregates: bool, - ) -> Result<(NodeId, Vec<NodeId>, GroupbyExpressionsMap), SbroadError> { - let mut grouping_expr = vec![]; - let mut gr_expr_map: GroupbyExpressionsMap = HashMap::new(); - let mut upper = upper; - - let mut has_groupby = matches!(self.get_relation_node(upper)?, Relational::GroupBy(_)); - - if !has_groupby && !has_aggregates { - if let Some(proj_id) = finals.first() { - if let Relational::Projection(Projection { - is_distinct, - output, - .. - }) = self.get_relation_node(*proj_id)? - { - if *is_distinct { - let proj_cols_len = self.get_row_list(*output)?.len(); - let mut grouping_exprs: Vec<NodeId> = Vec::with_capacity(proj_cols_len); - for i in 0..proj_cols_len { - let aliased_col = self.get_proj_col(*proj_id, i)?; - let proj_col_id = if let Expression::Alias(Alias { child, .. }) = - self.get_expression_node(aliased_col)? - { - *child - } else { - aliased_col - }; - // Copy expression from Projection to GroupBy. - let col = SubtreeCloner::clone_subtree(self, proj_col_id)?; - grouping_exprs.push(col); - } - upper = self.add_groupby(upper, &grouping_exprs, false, Some(*proj_id))?; + ) -> Result<Option<NodeId>, SbroadError> { + let Relational::Projection(Projection { + is_distinct, + output, + .. + }) = self.get_relation_node(proj_id)? + else { + unreachable!("Projection expected as a top final node") + }; - has_groupby = true; - } - } + let groupby_id = if *is_distinct { + let proj_cols_len = self.get_row_list(*output)?.len(); + let mut grouping_exprs: Vec<NodeId> = Vec::with_capacity(proj_cols_len); + for i in 0..proj_cols_len { + let aliased_col = self.get_proj_col(proj_id, i)?; + let proj_col_id = if let Expression::Alias(Alias { child, .. }) = + self.get_expression_node(aliased_col)? + { + *child + } else { + aliased_col + }; + // Copy expression from Projection to GroupBy. + let col = SubtreeCloner::clone_subtree(self, proj_col_id)?; + grouping_exprs.push(col); } - } + let groupby_id = self.add_groupby(upper, &grouping_exprs, Some(proj_id))?; + Some(groupby_id) + } else { + None + }; + Ok(groupby_id) + } - if has_groupby { - let old_gr_exprs = self.get_grouping_exprs(upper)?; - // remove duplicate expressions - let mut unique_grouping_exprs: OrderedSet<GroupingExpression, _> = - OrderedSet::with_capacity_and_hasher(old_gr_exprs.len(), RepeatableState); - for gr_expr in old_gr_exprs { - unique_grouping_exprs.insert(GroupingExpression::new(*gr_expr, self)); - } - let grouping_exprs: Vec<NodeId> = unique_grouping_exprs.iter().map(|e| e.id).collect(); - grouping_expr.extend(grouping_exprs.iter()); - self.set_grouping_exprs(upper, grouping_exprs)?; - - let mut mapper = ExpressionMapper::new(&grouping_expr, self); - for node_id in finals { - match self.get_relation_node(*node_id)? { - Relational::Projection(Projection { output, .. }) => { - for col in self.get_row_list(*output)? { - mapper.find_matches(*col, *node_id)?; - } - } - Relational::NamedWindows(_) => { - unreachable!("NamedWindows node should not be present in finals"); - } - Relational::Having(Having { filter, .. }) => { - mapper.find_matches(*filter, *node_id)?; + /// Fill grouping expression map (see comments next to + /// `GroupbyExpressionsMap` definition). + #[allow(clippy::too_many_lines)] + fn fill_gr_exprs_map( + &mut self, + finals: &Vec<NodeId>, + groupby_info: &mut GroupByInfo, + ) -> Result<(), SbroadError> { + for rel_id in finals { + let final_node = self.get_relation_node(*rel_id)?; + match final_node { + Relational::Projection(Projection { output, .. }) => { + let mut mapper = ExpressionMapper::new( + &groupby_info.grouping_exprs, + self, + *rel_id, + &mut groupby_info.gr_exprs_map, + ); + for col in self.get_row_list(*output)? { + mapper.find_matches(*col)?; } - _ => {} + } + Relational::Having(Having { filter, .. }) => { + let mut mapper = ExpressionMapper::new( + &groupby_info.grouping_exprs, + self, + *rel_id, + &mut groupby_info.gr_exprs_map, + ); + mapper.find_matches(*filter)?; + } + _ => { + unreachable!("{final_node:?} node should not be present in finals"); } } - gr_expr_map = mapper.get_matches(); } - if has_aggregates && !has_groupby { - // check that all column references are inside aggregate functions - for id in finals { - let node = self.get_relation_node(*id)?; - match node { - Relational::Projection(Projection { output, .. }) => { - for col in self.get_row_list(*output)? { - let filter = |node_id: NodeId| -> bool { - matches!( - self.get_node(node_id), - Ok(Node::Expression(Expression::Reference(_))) - ) - }; - let mut dfs = PostOrderWithFilter::with_capacity( - |x| self.nodes.aggregate_iter(x, false), - EXPR_CAPACITY, - Box::new(filter), - ); - dfs.populate_nodes(*col); - let nodes = dfs.take_nodes(); - for level_node in nodes { - let id = level_node.1; - let n = self.get_expression_node(id)?; - if let Expression::Reference(_) = n { - let alias = match self.get_alias_from_reference_node(&n) { - Ok(v) => v.to_smolstr(), - Err(e) => e.to_smolstr(), - }; - return Err(SbroadError::Invalid(Entity::Query, - Some(format_smolstr!("found column reference ({}) outside aggregate function", to_user(alias))))); - } - } - } - } - Relational::Having(Having { filter, .. }) => { - let mut bfs = BreadthFirst::with_capacity( + Ok(()) + } + + /// In case query doesn't contain user defined GroupBy, check that all + /// column references under `finals` are inside aggregate functions. + fn check_refs_out_of_aggregates(&self, finals: &Vec<NodeId>) -> Result<(), SbroadError> { + for id in finals { + let node = self.get_relation_node(*id)?; + match node { + Relational::Projection(Projection { output, .. }) => { + for col in self.get_row_list(*output)? { + let filter = |node_id: NodeId| -> bool { + matches!( + self.get_node(node_id), + Ok(Node::Expression(Expression::Reference(_))) + ) + }; + let mut dfs = PostOrderWithFilter::with_capacity( |x| self.nodes.aggregate_iter(x, false), EXPR_CAPACITY, - EXPR_CAPACITY, + Box::new(filter), ); - bfs.populate_nodes(*filter); - let nodes = bfs.take_nodes(); + dfs.populate_nodes(*col); + let nodes = dfs.take_nodes(); for level_node in nodes { let id = level_node.1; - if let Expression::Reference(_) = self.get_expression_node(id)? { + let n = self.get_expression_node(id)?; + if let Expression::Reference(_) = n { + let alias = match self.get_alias_from_reference_node(&n) { + Ok(v) => v.to_smolstr(), + Err(e) => e.to_smolstr(), + }; return Err(SbroadError::Invalid( Entity::Query, - Some("HAVING argument must appear in the GROUP BY clause or be used in an aggregate function".into()) + Some(format_smolstr!( + "found column reference ({}) outside aggregate function", + to_user(alias) + )), )); } } } - _ => {} } + Relational::Having(Having { filter, .. }) => { + let mut dfs = PostOrder::with_capacity( + |x| self.nodes.aggregate_iter(x, false), + EXPR_CAPACITY, + ); + dfs.populate_nodes(*filter); + let nodes = dfs.take_nodes(); + for level_node in nodes { + let id = level_node.1; + if let Expression::Reference(_) = self.get_expression_node(id)? { + return Err(SbroadError::Invalid( + Entity::Query, + Some("HAVING argument must appear in the GROUP BY clause or be used in an aggregate function".into()) + )); + } + } + } + _ => {} } } + Ok(()) + } - Ok((upper, grouping_expr, gr_expr_map)) + /// Check for GroupBy on bucket_id column. + /// In that case GroupBy can be done locally. + fn check_bucket_id_under_group_by( + &self, + grouping_exprs: &Vec<NodeId>, + ) -> Result<bool, SbroadError> { + for expr_id in grouping_exprs { + let Expression::Reference(Reference { position, .. }) = + self.get_expression_node(*expr_id)? + else { + continue; + }; + let child_id = self.get_relational_from_reference_node(*expr_id)?; + let mut context = self.context_mut(); + if let Some(shard_positions) = context.get_shard_columns_positions(child_id, self)? { + if shard_positions[0] == Some(*position) || shard_positions[1] == Some(*position) { + return Ok(true); + } + } + } + Ok(false) } - /// Add expressions used as arguments to distinct aggregates to `GroupBy` in reduce stage - /// - /// E.g: For query below, this func should add b*b to reduce `GroupBy` - /// `select a, sum(distinct b*b), count(c) from t group by a` - /// Map: `select a as l1, b*b as l2, count(c) as l3 from t group by a, b` - /// Reduce: `select l1, sum(distinct l2), sum(l3) from tmp_space group by l1` - fn add_distinct_aggregates_to_local_groupby( + /// In case we have distinct aggregates like `count(distinct a)` they result + /// in adding its argument expressions (expression a for the case above) under + /// local GroupBy node. + fn collect_grouping_exprs_from_distinct_aggrs<'aggr>( + &self, + aggrs: &'aggr mut [Aggregate], + ) -> Result<Vec<(NodeId, &'aggr mut Aggregate)>, SbroadError> { + let mut res = Vec::with_capacity(aggrs.len()); + for aggr in aggrs.iter_mut().filter(|x| x.is_distinct) { + let arg: NodeId = *self + .nodes + .expr_iter(aggr.fun_id, false) + .collect::<Vec<NodeId>>() + .first() + .expect("Number of args for aggregate should have been already checked"); + res.push((arg, aggr)); + } + Ok(res) + } + + /// Adds grouping expressions to columns of local projection. + fn add_grouping_exprs( &mut self, - upper: NodeId, - additional_grouping_exprs: Vec<NodeId>, - ) -> Result<NodeId, SbroadError> { - let mut local_proj_child_id = upper; - if !additional_grouping_exprs.is_empty() { - if let MutRelational::GroupBy(GroupBy { gr_exprs, .. }) = - self.get_mut_relation_node(local_proj_child_id)? - { - gr_exprs.extend(additional_grouping_exprs); - } else { - local_proj_child_id = - self.add_groupby(upper, &additional_grouping_exprs, true, None)?; - self.set_distribution(self.get_relational_output(local_proj_child_id)?)?; - } + groupby_info: &mut GroupByInfo, + output_cols: &mut Vec<NodeId>, + ) -> Result<(), SbroadError> { + // Map of { grouping_expr_alias -> proj_output_position }. + let mut alias_to_pos: HashMap<Rc<String>, usize> = HashMap::with_capacity(EXPR_CAPACITY); + // Add grouping expressions to local projection. + for (pos, (gr_expr, info)) in groupby_info.grouping_expr_to_alias_map.iter().enumerate() { + let local_alias = &info.local_alias; + let new_gr_expr = SubtreeCloner::clone_subtree(self, *gr_expr)?; + let new_alias = self.nodes.add_alias(local_alias, new_gr_expr)?; + output_cols.push(new_alias); + alias_to_pos.insert(local_alias.clone(), pos); } - Ok(local_proj_child_id) + // Note: we need to iterate only over grouping expressions that were present + // in original user query here. We must not use the grouping expressions + // that come from distinct aggregates. This is because they are handled separately: + // local aliases map is needed only for GroupBy expressions in the original query and + // grouping positions are used to create a Motion later, which should take into account + // only positions from GroupBy expressions in the original user query. + for expr_id in &groupby_info.grouping_exprs { + let local_info = groupby_info + .grouping_expr_to_alias_map + .get(expr_id) + .expect("grouping expressions map should contain given expr_id"); + let local_alias = local_info.local_alias.clone(); + groupby_info + .reduce_info + .local_aliases_map + .insert(*expr_id, local_alias.clone()); + let pos = alias_to_pos + .get(&local_alias) + .expect("alias map should contain given local alias"); + groupby_info.reduce_info.grouping_positions.push(*pos); + } + + Ok(()) + } + + /// Creates columns for local projection + /// + /// local projection contains groupby columns + local aggregates, + /// this function removes duplicated among them and creates the list for output + /// `Row` for local projection. + /// + /// In case we have distinct aggregates and no groupby in original query, + /// local `GroupBy` node will created. + fn create_columns_for_local_proj( + &mut self, + aggrs: &mut [Aggregate], + groupby_info: &mut Option<GroupByInfo>, + ) -> Result<Vec<NodeId>, SbroadError> { + let mut output_cols: Vec<NodeId> = vec![]; + + if let Some(groupby_info) = groupby_info.as_mut() { + self.add_grouping_exprs(groupby_info, &mut output_cols)?; + }; + self.add_local_aggregates(aggrs, &mut output_cols)?; + + Ok(output_cols) } /// Create Projection node for Map(local) stage of 2-stage aggregation /// /// # Arguments /// - /// * `child_id` - id of child for Projection node to be created. - /// * `aggr_infos` - vector of metadata for each aggregate function that was found in final + /// * `upper_id` - id of child for Projection node to be created. + /// * `aggrs` - vector of metadata for each aggregate function that was found in final /// projection. Each info specifies what kind of aggregate it is (sum, avg, etc) and location /// in final projection. - /// * `grouping_exprs` - ids of grouping expressions from local `GroupBy`, empty if there is - /// no `GroupBy` in original query. - /// * `finals` - ids of nodes from final stage, starting from `Projection`. - /// Final stage may contain `Projection`, `Limit`, `OrderBy`, `Having` nodes. /// /// Local Projection is created by creating columns for grouping expressions and columns /// for local aggregates. If there is no `GroupBy` in the original query then `child_id` refers @@ -768,26 +666,20 @@ impl Plan { /// ``` /// The same logic must be applied to any node in final stage of 2-stage aggregation: /// `Having`, `GroupBy`, `OrderBy`. See [`add_two_stage_aggregation`] for more details. - /// - /// # Returns - /// - id of local `Projection` that was created. - /// - vector of positions by which `GroupBy` is done. Positions are relative to local `Projection` - /// output. - /// - map between `GroupBy` expression and corresponding local alias. fn add_local_projection( &mut self, - child_id: NodeId, - aggr_infos: &mut Vec<AggrInfo>, - grouping_exprs: &[NodeId], - ) -> Result<(NodeId, Vec<usize>, LocalAliasesMap), SbroadError> { - let (child_id, proj_output_cols, groupby_local_aliases, grouping_positions) = - self.create_columns_for_local_proj(aggr_infos, child_id, grouping_exprs)?; - let proj_output = self.nodes.add_row(proj_output_cols, None); + upper_id: NodeId, + aggrs: &mut Vec<Aggregate>, + groupby_info: &mut Option<GroupByInfo>, + ) -> Result<NodeId, SbroadError> { + let proj_output_cols = self.create_columns_for_local_proj(aggrs, groupby_info)?; + let proj_output: NodeId = self.nodes.add_row(proj_output_cols, None); let ref_rel_nodes = self.get_relational_nodes_from_row(proj_output)?; - let mut children = vec![child_id]; + let mut children = vec![upper_id]; + // Handle subqueries. for ref_rel_node_id in ref_rel_nodes { let rel_node = self.get_relation_node(ref_rel_node_id)?; if matches!(rel_node, Relational::ScanSubQuery { .. }) @@ -815,312 +707,47 @@ impl Plan { }; let proj_id = self.add_relational(proj.into())?; - for info in aggr_infos { - // We take expressions inside aggregate functions from Final projection, - // so we need to update parent - self.replace_parent_in_subtree(info.aggr.fun_id, Some(info.parent_rel), Some(proj_id))?; + // We take expressions inside aggregate functions from Final projection, + // so we need to update parent. + for aggr in aggrs { + self.replace_parent_in_subtree(aggr.fun_id, Some(aggr.parent_rel), Some(proj_id))?; } - self.set_distribution(proj_output)?; + if let Some(groupby_info) = groupby_info.as_mut() { + let local_projection_output = self.get_row_list(proj_output)?.clone(); - Ok((proj_id, grouping_positions, groupby_local_aliases)) - } - - fn create_local_aggregate( - &mut self, - kind: AggregateKind, - arguments: &[NodeId], - local_alias: &str, - ) -> Result<NodeId, SbroadError> { - let fun: Function = Function { - name: kind.to_smolstr(), - behavior: Behavior::Stable, - func_type: kind.to_type(self, arguments)?, - is_system: true, - }; - // We can reuse aggregate expression between local aggregates, because - // all local aggregates are located inside the same motion subtree and we - // assume that each local aggregate does not need to modify its expression - let local_fun_id = self.add_stable_function(&fun, arguments.to_vec(), None)?; - let alias_id = self.nodes.add_alias(local_alias, local_fun_id)?; - Ok(alias_id) - } - - /// Creates columns for local projection - /// - /// local projection contains groupby columns + local aggregates, - /// this function removes duplicated among them and creates the list for output - /// `Row` for local projection. - /// - /// In case we have distinct aggregates and no groupby in original query, - /// local `GroupBy` node will created. - /// - /// # Returns - /// - id of local Projection child. - /// - created list of columns - /// - mapping between `GroupBy` expressions and local aliases - /// - grouping positions: positions of columns by which `GroupBy` is done - fn create_columns_for_local_proj( - &mut self, - aggr_infos: &mut [AggrInfo], - upper_id: NodeId, - grouping_exprs: &[NodeId], - ) -> Result<(NodeId, Vec<NodeId>, LocalAliasesMap, Vec<usize>), SbroadError> { - let mut output_cols: Vec<NodeId> = vec![]; - let (local_aliases, child_id, grouping_positions) = - self.add_grouping_exprs(aggr_infos, upper_id, grouping_exprs, &mut output_cols)?; - self.add_local_aggregates(aggr_infos, &mut output_cols)?; - - Ok((child_id, output_cols, local_aliases, grouping_positions)) - } - - /// Adds grouping expressions to columns of local projection - /// - /// # Arguments - /// * `aggr_infos` - list of metadata info for each aggregate - /// * `upper_id` - first node in local stage, if `GroupBy` was - /// present in the original user query, then it is the id of that - /// `GroupBy` - /// * `grouping_exprs` - ids of grouping expressions from local - /// `GroupBy`. It is assumed that there are no duplicate expressions - /// among them. - /// * `output_cols` - list of projection columns, where to push grouping - /// expressions. - /// - /// # Returns - /// - map between grouping expression id and corresponding local alias - /// - id of a Projection child, in case there are distinct aggregates and - /// no local `GroupBy` node, this node will be created - /// - list of positions in projection columns by which `GroupBy` is done. These - /// positions are later used to create Motion node and they include only positions - /// from original `GroupBy`. Grouping expressions from distinct aggregates are not - /// included in this list as they shouldn't be used for Motion node. - fn add_grouping_exprs( - &mut self, - aggr_infos: &mut [AggrInfo], - upper_id: NodeId, - grouping_exprs: &[NodeId], - output_cols: &mut Vec<NodeId>, - ) -> Result<(LocalAliasesMap, NodeId, Vec<usize>), SbroadError> { - let mut unique_grouping_exprs_for_local_stage_full: OrderedMap< - GroupingExpression, - Rc<String>, - RepeatableState, - > = OrderedMap::with_hasher(RepeatableState); - for gr_expr in grouping_exprs.iter() { - unique_grouping_exprs_for_local_stage_full.insert( - GroupingExpression::new(*gr_expr, self), - Rc::new(Self::generate_local_alias(*gr_expr)), - ); - } - - // add grouping expressions found from distinct aggregates to local groupby - let mut grouping_exprs_from_aggregates: Vec<NodeId> = vec![]; - for info in aggr_infos.iter_mut().filter(|x| x.is_distinct) { - let argument = { - let args = self - .nodes - .expr_iter(info.aggr.fun_id, false) - .collect::<Vec<NodeId>>(); - if args.len() > 1 && !matches!(info.aggr.kind, AggregateKind::GRCONCAT) { - return Err(SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "aggregate ({info:?}) have more than one argument" - ))); - } - *args.first().ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "Aggregate function has no children: {info:?}" - )) - })? - }; - let expr = GroupingExpression::new(argument, self); - if let Some(local_alias) = unique_grouping_exprs_for_local_stage_full.get(&expr) { - info.aggr - .lagg_alias - .insert(info.aggr.kind, local_alias.clone()); - } else { - grouping_exprs_from_aggregates.push(argument); - let local_alias = Rc::new(Self::generate_local_alias(argument)); - unique_grouping_exprs_for_local_stage_full.insert(expr, local_alias.clone()); - info.aggr.lagg_alias.insert(info.aggr.kind, local_alias); - } - } - - // Because of borrow checker we need to remove references to Plan from map - let mut unique_grouping_exprs_for_local_stage: OrderedMap< - NodeId, - Rc<String>, - RepeatableState, - > = OrderedMap::with_capacity_and_hasher( - unique_grouping_exprs_for_local_stage_full.len(), - RepeatableState, - ); - for (gr_expr, name) in unique_grouping_exprs_for_local_stage_full.iter() { - unique_grouping_exprs_for_local_stage.insert(gr_expr.id, name.clone()) - } - - let mut alias_to_pos: HashMap<Rc<String>, usize> = HashMap::new(); - // add grouping expressions to local projection - for (pos, (gr_expr, local_alias)) in - unique_grouping_exprs_for_local_stage.iter().enumerate() - { - let new_alias = self.nodes.add_alias(local_alias, *gr_expr)?; - output_cols.push(new_alias); - alias_to_pos.insert(local_alias.clone(), pos); - } - - let mut local_aliases: LocalAliasesMap = - HashMap::with_capacity(unique_grouping_exprs_for_local_stage.len()); - let mut grouping_positions: Vec<usize> = Vec::with_capacity(grouping_exprs.len()); - - // Note: we need to iterate only over grouping expressions that were present - // in original user query here. We must not use the grouping expressions - // that come from distinct aggregates. This is because they are handled separately: - // local aliases map is needed only for GroupBy expressions in the original query and - // grouping positions are used to create a Motion later, which should take into account - // only positions from GroupBy expressions in the original user query. - for expr_id in grouping_exprs.iter() { - if let Some(local_alias) = unique_grouping_exprs_for_local_stage.remove(expr_id) { - local_aliases.insert(*expr_id, local_alias.clone()); - if let Some(pos) = alias_to_pos.get(&local_alias) { - grouping_positions.push(*pos); - } else { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!("missing position for local GroupBy column with local alias: {local_alias}")) - )); - } - } else { - return Err(SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!("invalid map with unique grouping expressions. Could not find grouping expression with id: {expr_id:?}")))); - } - } - - let child_id = self - .add_distinct_aggregates_to_local_groupby(upper_id, grouping_exprs_from_aggregates)?; - Ok((local_aliases, child_id, grouping_positions)) - } - - /// Adds aggregates columns in `output_cols` for local `Projection` - /// - /// This function collects local aggregates from each `AggrInfo`, - /// then it removes duplicates from them using `AggregateSignature`. - /// Next, it creates for each unique aggregate local alias and column. - #[allow(clippy::mutable_key_type)] - fn add_local_aggregates( - &mut self, - aggr_infos: &mut [AggrInfo], - output_cols: &mut Vec<NodeId>, - ) -> Result<(), SbroadError> { - // Aggregate expressions can appear in `Projection`, `Having`, `OrderBy`, if the - // same expression appears in different places, we must not calculate it separately: - // `select sum(a) from t group by b having sum(a) > 10` - // Here `sum(a)` appears both in projection and having, so we need to calculate it only once. - let mut unique_local_aggregates: HashSet<AggregateSignature, RepeatableState> = - HashSet::with_hasher(RepeatableState); - for pos in 0..aggr_infos.len() { - let info = aggr_infos.get(pos).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "invalid idx of aggregate infos ({pos})" - )) - })?; - if info.is_distinct { - continue; - } - let arguments = { - if let Expression::StableFunction(StableFunction { children, .. }) = - self.get_expression_node(info.aggr.fun_id)? - { - children - } else { - return Err(SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!("invalid fun_id: {:?}", info.aggr.fun_id)), - )); - } - }; - for kind in info.aggr.kind.get_local_aggregates_kinds() { - let mut signature = AggregateSignature { - kind, - arguments, - plan: self, - local_alias: None, - }; - if let Some(sig) = unique_local_aggregates.get(&signature) { - if let Some(alias) = &sig.local_alias { - let info = aggr_infos.get_mut(pos).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "invalid idx of aggregate infos ({pos})" - )) - })?; - info.aggr.lagg_alias.insert(kind, alias.clone()); - } else { - return Err(SbroadError::Invalid( - Entity::AggregateSignature, - Some("no local alias".into()), - )); - } - } else { - let info = aggr_infos.get_mut(pos).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "invalid idx of aggregate infos ({pos})" - )) - })?; - let alias = Rc::new(generate_local_alias_for_aggr( - &kind, - &format_smolstr!("{}", info.aggr.fun_id), - )); - info.aggr.lagg_alias.insert(kind, alias.clone()); - signature.local_alias = Some(alias); - unique_local_aggregates.insert(signature); - } + for (new_gr_expr_pos, (_, info)) in + groupby_info.grouping_expr_to_alias_map.iter().enumerate() + { + let new_gr_expr_id = *local_projection_output + .get(new_gr_expr_pos) + .expect("Grouping expression should be found under local Projection output"); + + // TODO: It may come from other nodes!!! + // For our case grouping expr b is coming from an aggregate under Having and not GroupBy. + // It may also came from Projection. + self.replace_parent_in_subtree( + new_gr_expr_id, + Some(info.parent_rel_id), + Some(proj_id), + )?; } - } - - // add non-distinct aggregates to local projection - let local_aggregates: Result<Vec<LocalAggrInfo>, SbroadError> = unique_local_aggregates - .into_iter() - .map( - |x| -> Result<(AggregateKind, Vec<NodeId>, Rc<String>), SbroadError> { - match x.get_alias() { - Ok(s) => Ok((x.kind, x.arguments.clone(), s)), - Err(e) => Err(e), - } - }, - ) - .collect(); - for (kind, arguments, local_alias) in local_aggregates? { - let alias_id = self.create_local_aggregate(kind, &arguments, local_alias.as_str())?; - output_cols.push(alias_id); - } + }; + self.set_distribution(proj_output)?; - Ok(()) + Ok(proj_id) } - /// Add final `GroupBy` node in case `grouping_exprs` are not empty - /// - /// # Arguments - /// * `child_id` - id if relational node that will the child of `GroupBy` - /// * `grouping_exprs` - list of grouping expressions ids (which does not include - /// grouping expressions from distinct arguments) - /// * `local_aliases_map` - map between expression from `GroupBy` to alias used - /// at local stage - /// - /// # Returns - /// - if `GroupBy` node was created, return its id - /// - if `GroupBy` node was not created, return `child_id` + /// Add final `GroupBy` node in case `grouping_exprs` are not empty. fn add_final_groupby( &mut self, child_id: NodeId, - grouping_exprs: &Vec<NodeId>, - local_aliases_map: &LocalAliasesMap, + groupby_info: &GroupByInfo, ) -> Result<NodeId, SbroadError> { - if grouping_exprs.is_empty() { - // no GroupBy in the original query, nothing to do - return Ok(child_id); - } + let grouping_exprs = &groupby_info.grouping_exprs; + let local_aliases_map = &groupby_info.reduce_info.local_aliases_map; + let mut gr_exprs: Vec<NodeId> = Vec::with_capacity(grouping_exprs.len()); - let child_map = ColumnPositionMap::new(self, child_id)?; + let child_map: ColumnPositionMap = ColumnPositionMap::new(self, child_id)?; let mut nodes = Vec::with_capacity(grouping_exprs.len()); for expr_id in grouping_exprs { let Some(local_alias) = local_aliases_map.get(expr_id) else { @@ -1157,6 +784,8 @@ impl Plan { gr_exprs.push(new_col_id); } let output = self.add_row_for_output(child_id, &[], true, None)?; + + // Because GroupBy node lies in the Arena64. let final_id = self.nodes.next_id(ArenaType::Arena64); for col in &gr_exprs { self.replace_parent_in_subtree(*col, None, Some(final_id))?; @@ -1164,7 +793,6 @@ impl Plan { let final_groupby = GroupBy { gr_exprs, children: vec![child_id], - is_final: true, output, }; self.replace_parent_in_subtree(output, None, Some(final_id))?; @@ -1177,20 +805,23 @@ impl Plan { /// references to local aliases. /// /// For example: - /// original query: `select a + b from t group by a + b` + /// original query: `select a + b as user_alias from t group by a + b` /// map query: `select a + b as l1 from t group by a + b` `l1` is local alias /// reduce query: `select l1 as user_alias from tmp_space group by l1` - /// In above example this function will replace `a+b` expression in final `Projection` + /// In above example this function will replace `a + b` expression in final `Projection` #[allow(clippy::too_many_lines)] fn patch_grouping_expressions( &mut self, - local_aliases_map: &LocalAliasesMap, - map: GroupbyExpressionsMap, + groupby_info: &GroupByInfo, ) -> Result<(), SbroadError> { + println!("Enter patch_grouping_expressions"); + let local_aliases_map = &groupby_info.reduce_info.local_aliases_map; + let gr_exprs_map = &groupby_info.gr_exprs_map; + type RelationalID = NodeId; type GroupByExpressionID = NodeId; type ExpressionID = NodeId; - type ExpressionParent = Option<NodeId>; + type ExpressionParent = NodeId; // Map of { Relation -> vec![( // expr_id under group by // expr_id of the same expr under other relation (e.g. Projection) @@ -1199,14 +830,14 @@ impl Plan { type ParentExpressionMap = HashMap<RelationalID, Vec<(GroupByExpressionID, ExpressionID, ExpressionParent)>>; let map: ParentExpressionMap = { - let mut new_map: ParentExpressionMap = HashMap::with_capacity(map.len()); - for (groupby_expr_id, locations) in map { + let mut new_map: ParentExpressionMap = HashMap::with_capacity(gr_exprs_map.len()); + for (groupby_expr_id, locations) in gr_exprs_map { for location in locations { - let rec = (groupby_expr_id, location.expr, location.parent_expr); - if let Some(u) = new_map.get_mut(&location.rel) { + let rec = (*groupby_expr_id, location.expr_id, location.parent_expr_id); + if let Some(u) = new_map.get_mut(&location.rel_id) { u.push(rec); } else { - new_map.insert(location.rel, vec![rec]); + new_map.insert(location.rel_id, vec![rec]); } } } @@ -1222,9 +853,10 @@ impl Plan { "expected relation node ({rel_id:?}) to have children!" )) })?; + println!("Before call ColumnPositionMap::new"); let alias_to_pos_map = ColumnPositionMap::new(self, child_id)?; let mut nodes = Vec::with_capacity(group.len()); - for (gr_expr_id, expr_id, parent) in group { + for (gr_expr_id, expr_id, parent_expr_id) in group { let Some(local_alias) = local_aliases_map.get(&gr_expr_id) else { return Err(SbroadError::Invalid( Entity::Plan, @@ -1252,38 +884,11 @@ impl Plan { col_type, asterisk_source: None, }; - nodes.push((parent, expr_id, gr_expr_id, new_ref)); + nodes.push((parent_expr_id, expr_id, new_ref)); } - for (parent, expr_id, gr_expr_id, node) in nodes { + for (parent_expr_id, expr_id, node) in nodes { let ref_id = self.nodes.push(node.into()); - if let Some(parent_expr_id) = parent { - self.replace_expression(parent_expr_id, expr_id, ref_id)?; - } else { - match self.get_mut_relation_node(rel_id)? { - MutRelational::Projection(_) => { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "{} {gr_expr_id:?} {} {expr_id:?} {}", - "invalid mapping between group by expression", - "and projection one: expression", - "has no parent", - )), - )) - } - MutRelational::Having(Having { filter, .. }) => { - *filter = ref_id; - } - _ => { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "unexpected node in Reduce stage: {rel_id:?}" - )), - )) - } - } - } + self.replace_expression(parent_expr_id, expr_id, ref_id)?; } } Ok(()) @@ -1309,160 +914,100 @@ impl Plan { /// * `finals_child_id` - id of a relational node right after `finals` in the plan. In case /// original query had `GroupBy`, this will be final `GroupBy` id. /// * `local_aliases_map` - map between grouping expressions ids and corresponding local aliases. - /// * `aggr_infos` - list of metadata about aggregates - /// * `gr_expr_map` - map between grouping expressions in `GroupBy` and grouping expressions - /// used in `finals`. + /// * `aggrs` - list of metadata about aggregates fn patch_finals( &mut self, finals: &[NodeId], finals_child_id: NodeId, - local_aliases_map: &LocalAliasesMap, - aggr_infos: &Vec<AggrInfo>, - gr_expr_map: GroupbyExpressionsMap, + aggrs: &Vec<Aggregate>, + groupby_info: &Option<GroupByInfo>, ) -> Result<(), SbroadError> { - // After we added a Map stage, we need to update output - // of nodes in Reduce stage - if let Some(last) = finals.last() { - if let Some(first) = self.get_mut_relation_node(*last)?.mut_children().get_mut(0) { - *first = finals_child_id; - } - } + // Update relational child of the last final. + let last_final_id = finals.last().expect("last final node should exist"); + *self + .get_mut_relation_node(*last_final_id)? + .mut_children() + .get_mut(0) + .expect("last final node should have child") = finals_child_id; + + // After we added a Map stage, we need to + // update output of Having in Reduce stage. for node_id in finals.iter().rev() { let node = self.get_relation_node(*node_id)?; match node { - // Projection node is the top node in finals: its aliases - // must not be changed (because those are user aliases), so - // nothing to do here - Relational::Projection(_) => {} - Relational::NamedWindows(_) => { - unreachable!("NamedWindows node should not be in finals") + Relational::Projection(_) => { + // Projection node is the top node in finals: its aliases + // must not be changed (because those are user aliases), so + // nothing to do here. } Relational::Having(Having { children, .. }) => { - let child_id = *children.first().ok_or_else(|| { - SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!("Having ({node_id:?}) has no children!")), - ) - })?; + let child_id = *children.first().expect("Having should have a child"); let output = self.add_row_for_output(child_id, &[], true, None)?; *self.get_mut_relation_node(*node_id)?.mut_output() = output; self.replace_parent_in_subtree(output, None, Some(*node_id))?; } - _ => { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!("Unexpected node in reduce stage: {node:?}")), - )) - } + _ => unreachable!("Unexpected node in reduce stage: {node:?}"), } } - self.patch_grouping_expressions(local_aliases_map, gr_expr_map)?; - let mut parent_to_infos: HashMap<NodeId, Vec<AggrInfo>> = + if let Some(groupby_info) = groupby_info { + self.patch_grouping_expressions(groupby_info)?; + } + + let mut parent_to_aggrs: HashMap<NodeId, Vec<Aggregate>> = HashMap::with_capacity(finals.len()); - for info in aggr_infos { - if let Some(v) = parent_to_infos.get_mut(&info.parent_rel) { - v.push(info.clone()); + for aggr in aggrs { + if let Some(v) = parent_to_aggrs.get_mut(&aggr.parent_rel) { + v.push(aggr.clone()); } else { - parent_to_infos.insert(info.parent_rel, vec![info.clone()]); + parent_to_aggrs.insert(aggr.parent_rel, vec![aggr.clone()]); } } - for (parent, infos) in parent_to_infos { - let child_id = { - let children = self.get_relational_children(parent)?; - *children.get(0).ok_or_else(|| { - SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!( - "patch aggregates: rel node ({parent:?}) has no children!" - )), - ) - })? - }; - let alias_to_pos_map = ColumnPositionMap::new(self, child_id)?; - let mut position_kinds = Vec::with_capacity(infos.len()); - for info in &infos { - position_kinds.push( - info.aggr - .get_position_kinds(&alias_to_pos_map, info.is_distinct)?, - ); - } - for (info, pos_kinds) in infos.into_iter().zip(position_kinds) { - let fun_expr = self.get_expression_node(info.aggr.fun_id)?; - let fun_type = fun_expr.calculate_type(self)?; - let final_expr = info.aggr.create_final_aggregate_expr( - parent, - self, - fun_type, - pos_kinds, - info.is_distinct, - )?; - if let Some(parent_expr) = info.parent_expr { - self.replace_expression(parent_expr, info.aggr.fun_id, final_expr)?; - } else { - let node = self.get_mut_relation_node(parent)?; - return Err(SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!( - "aggregate info for {node:?} that hat no parent! Info: {info:?}" - )), - )); - } + for (parent, aggrs) in parent_to_aggrs { + let child_id = *self + .get_relational_children(parent)? + .get(0) + .expect("final relational node should have a child"); + + // AggrKind -> LocalAlias -> Pos in the output + let alias_to_pos_map: ColumnPositionMap = ColumnPositionMap::new(self, child_id)?; + for aggr in aggrs { + // Position in the output with aggregate kind. + let pos_kinds = aggr.get_position_kinds(&alias_to_pos_map)?; + let final_expr = aggr.create_final_aggregate_expr(self, pos_kinds)?; + self.replace_expression(aggr.parent_expr, aggr.fun_id, final_expr)?; } } Ok(()) } - fn add_motion_to_2stage( + fn add_motion_to_two_stage( &mut self, - grouping_positions: &[usize], - motion_parent: NodeId, + groupby_info: &Option<GroupByInfo>, + finals_child_id: NodeId, finals: &[NodeId], ) -> Result<(), SbroadError> { - let proj_id = *finals.first().ok_or_else(|| { - SbroadError::Invalid(Entity::Plan, Some("no nodes in Reduce stage!".into())) - })?; - if let Relational::Projection(_) = self.get_relation_node(proj_id)? { + let final_proj_id = *finals.first().expect("finals should not be empty"); + if let Relational::Projection(_) = self.get_relation_node(final_proj_id)? { } else { - return Err(SbroadError::Invalid( - Entity::Plan, - Some("expected Projection as first node in reduce stage!".into()), - )); + unreachable!("Projection should be the first node in reduce stage") } - if grouping_positions.is_empty() { - // no GroupBy - let last_final_id = *finals.last().ok_or_else(|| { - SbroadError::Invalid(Entity::Plan, Some("Reduce stage has no nodes!".into())) - })?; - let mut strategy = Strategy::new(last_final_id); - strategy.add_child(motion_parent, MotionPolicy::Full, Program::default()); - self.create_motion_nodes(strategy)?; - self.set_dist(self.get_relational_output(proj_id)?, Distribution::Single)?; - } else { - // we have GroupBy, then finals_child_id is final GroupBy - let child_id = if let Relational::GroupBy(GroupBy { children, .. }) = - self.get_relation_node(motion_parent)? - { - *children.first().ok_or_else(|| { - SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!( - "final GroupBy ({motion_parent:?}) has no children!" - )), - ) - })? - } else { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "expected to have GroupBy under reduce nodes on id: {motion_parent:?}" - )), - )); - }; - let mut strategy = Strategy::new(motion_parent); + // In case we have local GroupBy, then `finals_child_id`` is local GroupBy. + let finals_child_node = self.get_relation_node(finals_child_id)?; + let has_local_group_by = matches!(finals_child_node, Relational::GroupBy(_)); + + if let Relational::GroupBy(GroupBy { children, .. }) = finals_child_node { + let final_group_by_child_id = *children.first().unwrap_or_else(|| { + unreachable!("final GroupBy ({finals_child_id:?}) should have children") + }); + + let groupby_info = groupby_info.as_ref().expect("GroupBy should exists"); + let grouping_positions: &Vec<usize> = &groupby_info.reduce_info.grouping_positions; + + let mut strategy = Strategy::new(finals_child_id); strategy.add_child( - child_id, + final_group_by_child_id, MotionPolicy::Segment(MotionKey { targets: grouping_positions .iter() @@ -1475,95 +1020,40 @@ impl Plan { // When we created final GroupBy we didn't set its distribution, because its // actual child (Motion) wasn't created yet. - self.set_distribution(self.get_relational_output(motion_parent)?)?; - } - Ok(()) - } + self.set_distribution(self.get_relational_output(finals_child_id)?)?; + } else { + // No local GroupBy. + let last_final_id = *finals.last().unwrap(); + let mut strategy = Strategy::new(last_final_id); + strategy.add_child(finals_child_id, MotionPolicy::Full, Program::default()); + self.create_motion_nodes(strategy)?; - /// Adds 2-stage aggregation and returns `true` if there are any aggregate - /// functions or `GroupBy` is present. Otherwise, returns `false` and - /// does nothing. - /// - /// # Errors - /// - failed to create local `GroupBy` node - /// - failed to create local `Projection` node - /// - failed to create `SQ` node - /// - failed to change final `GroupBy` child to `SQ` - /// - failed to update expressions in final `Projection` - pub fn add_two_stage_aggregation( - &mut self, - final_proj_id: NodeId, - ) -> Result<bool, SbroadError> { - let (finals, upper) = self.split_group_by(final_proj_id)?; - let mut aggr_infos = self.collect_aggregates(&finals)?; - let has_aggregates = !aggr_infos.is_empty(); - let (upper, grouping_exprs, gr_expr_map) = - self.collect_grouping_expressions(upper, &finals, has_aggregates)?; - if grouping_exprs.is_empty() && aggr_infos.is_empty() { - return Ok(false); + self.set_dist( + self.get_relational_output(final_proj_id)?, + Distribution::Single, + )?; } - // Check for group by on bucket_id column - // in that case groupby can be done locally. - if !grouping_exprs.is_empty() { - // let shard_col_info = self.track_shard_column_pos(final_proj_id)?; - for expr_id in &grouping_exprs { - let Expression::Reference(Reference { position, .. }) = - self.get_expression_node(*expr_id)? - else { - continue; - }; - let child_id = self.get_relational_from_reference_node(*expr_id)?; - let mut context = self.context_mut(); - if let Some(shard_positions) = - context.get_shard_columns_positions(child_id, self)? - { - if shard_positions[0] == Some(*position) - || shard_positions[1] == Some(*position) - { - return Ok(false); - } - } - } + // Set distribution to final outputs (except Projection). + for node_id in finals.iter().skip(1).rev() { + self.set_distribution(self.get_relational_output(*node_id)?)?; + } + if has_local_group_by { + // In case we've added final GroupBy we set distribution based on it. + self.set_distribution(self.get_relational_output(final_proj_id)?)?; } - let (local_proj_id, grouping_positions, local_aliases_map) = - self.add_local_projection(upper, &mut aggr_infos, &grouping_exprs)?; - - self.set_distribution(self.get_relational_output(local_proj_id)?)?; - let finals_child_id = - self.add_final_groupby(local_proj_id, &grouping_exprs, &local_aliases_map)?; - - self.patch_finals( - &finals, - finals_child_id, - &local_aliases_map, - &aggr_infos, - gr_expr_map, - )?; - self.add_motion_to_2stage(&grouping_positions, finals_child_id, &finals)?; + Ok(()) + } + /// Create Motion nodes for scalar subqueries present under Having node. + fn fix_subqueries_under_having(&mut self, finals: &[NodeId]) -> Result<(), SbroadError> { let mut having_id: Option<NodeId> = None; - // skip Projection for node_id in finals.iter().skip(1).rev() { - self.set_distribution(self.get_relational_output(*node_id)?)?; if let Relational::Having(_) = self.get_relation_node(*node_id)? { having_id = Some(*node_id); } } - - if matches!( - self.get_relation_node(finals_child_id)?, - Relational::GroupBy(_) - ) { - self.set_distribution(self.get_relational_output(final_proj_id)?)?; - } else { - self.set_dist( - self.get_relational_output(final_proj_id)?, - Distribution::Single, - )?; - } - if let Some(having_id) = having_id { if let Relational::Having(Having { filter, output, .. }) = self.get_relation_node(having_id)? @@ -1577,6 +1067,163 @@ impl Plan { self.try_dist_from_subqueries(having_id, output)?; } } + Ok(()) + } + + /// Adds 2-stage aggregation and returns `true` if there are any aggregate + /// functions or `GroupBy` is present. Otherwise, returns `false` and + /// does nothing. + pub fn add_two_stage_aggregation( + &mut self, + final_proj_id: NodeId, + ) -> Result<bool, SbroadError> { + let (finals, mut upper_id) = self.split_group_by(final_proj_id)?; + let mut groupby_info = + if matches!(self.get_relation_node(upper_id)?, Relational::GroupBy(_)) { + // In case user defined GroupBy in initial query. + // + // Example: `select a from t group by a`. + Some(GroupByInfo::new(upper_id)) + } else { + None + }; + + let mut aggrs = self.collect_aggregates(&finals)?; + + if groupby_info.is_none() && aggrs.is_empty() { + if let Some(groupby_id) = self.add_group_by_for_distinct(final_proj_id, upper_id)? { + // In case aggregates or GroupBy are present "distinct" qualifier under + // Projection doesn't add any new features to the plan. Otherwise, we should add + // a new GroupBy node for a local map stage. + // + // Example: `select distinct a, b + 42 from t`. + upper_id = groupby_id; + groupby_info = Some(GroupByInfo::new(upper_id)); + } else { + // Query doesn't contain GroupBy, aggregates or "distinct" qualifier. + // + // Example: `select a, b + 42 from t`. + return Ok(false); + } + } + + if groupby_info.is_none() { + self.check_refs_out_of_aggregates(&finals)?; + } + + let distinct_grouping_exprs = + self.collect_grouping_exprs_from_distinct_aggrs(&mut aggrs)?; + if groupby_info.is_none() && !distinct_grouping_exprs.is_empty() { + // GroupBy doesn't exist and we have to create it just for + // distinct aggregates. + // + // Example: `select sum(distinct a) from t` + + // grouping_exprs will be set few lines below. + let groupby_id = self.add_groupby(upper_id, &[], None)?; + upper_id = groupby_id; + groupby_info = Some(GroupByInfo::new(upper_id)); + } + + // Index for generating local grouping expressions aliases. + let mut local_alias_index = 1; + // Map of { grouping_expression -> (local_alias + parent_rel_id) }. + let mut unique_grouping_expr_to_alias_map: OrderedMap< + GroupingExpression, + LocalProjectionGroupingExprInfo, + RepeatableState, + > = OrderedMap::with_capacity_and_hasher(GR_EXPR_CAPACITY, RepeatableState); + // Grouping expressions for local GroupBy. + let mut grouping_exprs_local = Vec::with_capacity(GR_EXPR_CAPACITY); + if let Some(groupby_info) = groupby_info.as_mut() { + // Leave only unique expressions under local GroupBy. + let gr_exprs = self.get_grouping_exprs(groupby_info.id)?; + for gr_expr in gr_exprs { + let local_alias = grouping_expr_local_alias(local_alias_index); + let new_expr = GroupingExpression::new(*gr_expr, self); + if !unique_grouping_expr_to_alias_map.contains_key(&new_expr) { + unique_grouping_expr_to_alias_map.insert( + new_expr, + LocalProjectionGroupingExprInfo { + local_alias, + parent_rel_id: groupby_info.id, + }, + ); + local_alias_index += 1; + } + } + for (expr, _) in unique_grouping_expr_to_alias_map.iter() { + let expr_id = expr.id; + grouping_exprs_local.push(expr_id); + groupby_info.grouping_exprs.push(expr_id); + } + + if !groupby_info.grouping_exprs.is_empty() + && self.check_bucket_id_under_group_by(&groupby_info.grouping_exprs)? + { + return Ok(false); + } + } + + // Set local aggregates aliases for distinct aggregatees. For non-distinct aggregates + // they would be set under `add_local_aggregates`. + for (gr_expr, aggr) in distinct_grouping_exprs { + let new_expr = GroupingExpression::new(gr_expr, self); + if let Some(info) = unique_grouping_expr_to_alias_map.get(&new_expr) { + aggr.lagg_aliases + .insert(aggr.kind, info.local_alias.clone()); + } else { + let local_alias = grouping_expr_local_alias(local_alias_index); + local_alias_index += 1; + aggr.lagg_aliases.insert(aggr.kind, local_alias.clone()); + + // Add expressions used as arguments to distinct aggregates to local `GroupBy`. + // + // E.g: For query below, we should add b*b to local `GroupBy` + // `select a, sum(distinct b*b), count(c) from t group by a` + // Map: `select a as l1, b*b as l2, count(c) as l3 from t group by a, b, b*b` + // Reduce: `select l1, sum(distinct l2), sum(l3) from tmp_space group by l1` + grouping_exprs_local.push(gr_expr); + unique_grouping_expr_to_alias_map.insert( + new_expr, + LocalProjectionGroupingExprInfo { + local_alias, + parent_rel_id: aggr.parent_rel, + }, + ); + } + } + + if let Some(groupby_info) = groupby_info.as_mut() { + for (expr, info) in unique_grouping_expr_to_alias_map.iter() { + groupby_info + .grouping_expr_to_alias_map + .insert(expr.id, info.clone()); + } + + self.set_grouping_exprs(groupby_info.id, grouping_exprs_local)?; + self.fill_gr_exprs_map(&finals, groupby_info)?; + } + + if let Some(groupby_info) = &groupby_info { + self.set_distribution(self.get_relational_output(groupby_info.id)?)?; + } + + let local_proj_id = self.add_local_projection(upper_id, &mut aggrs, &mut groupby_info)?; + let finals_child_id = if let Some(groupby_info) = groupby_info.as_ref() { + if groupby_info.grouping_exprs.is_empty() { + local_proj_id + } else { + self.add_final_groupby(local_proj_id, groupby_info)? + } + } else { + local_proj_id + }; + self.patch_finals(&finals, finals_child_id, &aggrs, &groupby_info)?; + + self.add_motion_to_two_stage(&groupby_info, finals_child_id, &finals)?; + + self.fix_subqueries_under_having(&finals)?; Ok(true) } diff --git a/sbroad/sbroad-core/src/ir/transformation/redistribution/tests.rs b/sbroad/sbroad-core/src/ir/transformation/redistribution/tests.rs index 32fdbdf8b9..750614e5b9 100644 --- a/sbroad/sbroad-core/src/ir/transformation/redistribution/tests.rs +++ b/sbroad/sbroad-core/src/ir/transformation/redistribution/tests.rs @@ -249,9 +249,9 @@ fn test_slices_2() { let plan = sql_to_optimized_ir(query, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_5596"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count(("t2"."e"::unsigned))::unsigned -> "count_5596") + projection (count(("t2"."e"::unsigned))::unsigned -> "count_1") join on true::boolean scan projection ("t2"."f"::unsigned -> "f") diff --git a/sbroad/sbroad-core/src/utils.rs b/sbroad/sbroad-core/src/utils.rs index 8f913890f9..5ac66195f6 100644 --- a/sbroad/sbroad-core/src/utils.rs +++ b/sbroad/sbroad-core/src/utils.rs @@ -132,6 +132,10 @@ impl<K: Clone + Hash + Eq, V: Clone, S: BuildHasher> OrderedMap<K, V, S> { self.map.get(key) } + pub fn contains_key(&self, key: &K) -> bool { + self.map.contains_key(key) + } + pub fn remove(&mut self, key: &K) -> Option<V> { self.order.retain(|(k, _)| k != key); self.map.remove(key) @@ -165,9 +169,9 @@ impl<'set, V: Clone + Hash + Eq, S: BuildHasher> Iterator for OrderedSetIterator } impl<V: Clone + Hash + Eq, S: BuildHasher> OrderedSet<V, S> { - pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self { + pub fn with_hasher(hasher: S) -> Self { Self { - map: OrderedMap::<V, (), S>::with_capacity_and_hasher(capacity, hasher), + map: OrderedMap::<V, (), S>::with_hasher(hasher), } } @@ -175,6 +179,10 @@ impl<V: Clone + Hash + Eq, S: BuildHasher> OrderedSet<V, S> { self.map.len() } + pub fn contains_key(&self, key: &V) -> bool { + self.map.contains_key(key) + } + pub fn is_empty(&self) -> bool { self.map.is_empty() } -- GitLab From f3838a41da584134108698b05e008b549628e519 Mon Sep 17 00:00:00 2001 From: EmirVildanov <reddog201030@gmail.com> Date: Thu, 20 Mar 2025 15:00:48 +0300 Subject: [PATCH 3/4] chore: remove useless debug info from sbroad unit tests --- .../src/executor/tests/frontend.rs | 1 - .../sbroad-core/src/frontend/sql/ir/tests.rs | 53 +------------------ .../src/frontend/sql/ir/tests/coalesce.rs | 1 - .../src/frontend/sql/ir/tests/funcs.rs | 1 - .../src/frontend/sql/ir/tests/insert.rs | 1 - .../src/frontend/sql/ir/tests/join.rs | 1 - .../src/frontend/sql/ir/tests/limit.rs | 1 - .../src/frontend/sql/ir/tests/trim.rs | 1 - .../src/frontend/sql/ir/tests/union.rs | 2 - .../src/frontend/sql/ir/tests/update.rs | 1 - sbroad/sbroad-core/src/ir/explain/tests.rs | 12 ----- .../src/ir/explain/tests/cast_constants.rs | 1 - .../src/ir/explain/tests/concat.rs | 4 -- .../src/ir/explain/tests/delete.rs | 6 --- .../src/ir/explain/tests/query_explain.rs | 3 -- 15 files changed, 2 insertions(+), 87 deletions(-) diff --git a/sbroad/sbroad-core/src/executor/tests/frontend.rs b/sbroad/sbroad-core/src/executor/tests/frontend.rs index b57745298c..7d8a6ce666 100644 --- a/sbroad/sbroad-core/src/executor/tests/frontend.rs +++ b/sbroad/sbroad-core/src/executor/tests/frontend.rs @@ -2,7 +2,6 @@ use super::*; use crate::executor::engine::mock::RouterRuntimeMock; use pretty_assertions::assert_eq; -use smol_str::format_smolstr; #[test] fn front_valid_sql1() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs index 71be50a2c3..671b70ee40 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs @@ -1296,7 +1296,6 @@ fn front_sql_groupby() { let input = r#"SELECT "identification_number", "product_code" FROM "hash_testing" group by "identification_number", "product_code""#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::integer -> "identification_number", "gr_expr_2"::string -> "product_code") group by ("gr_expr_1"::integer, "gr_expr_2"::string) output: ("gr_expr_1"::integer -> "gr_expr_1", "gr_expr_2"::string -> "gr_expr_2") @@ -1319,7 +1318,6 @@ fn front_sql_groupby_less_cols_in_proj() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::integer -> "identification_number") group by ("gr_expr_1"::integer, "gr_expr_2"::boolean) output: ("gr_expr_1"::integer -> "gr_expr_1", "gr_expr_2"::boolean -> "gr_expr_2") @@ -1368,7 +1366,6 @@ fn front_sql_groupby_union_2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all projection ("hash_testing"."identification_number"::integer -> "identification_number") @@ -1400,7 +1397,6 @@ fn front_sql_groupby_join_1() { "#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::string -> "product_code", "gr_expr_2"::boolean -> "product_units") group by ("gr_expr_1"::string, "gr_expr_2"::boolean) output: ("gr_expr_1"::string -> "gr_expr_1", "gr_expr_2"::boolean -> "gr_expr_2") @@ -1475,7 +1471,6 @@ fn front_sql_join() { "#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // TODO: For the hash function in the cartrisge runtime we can apply // `motion [policy: segment([ref("id")])]` instead of the `motion [policy: full]`. @@ -1503,7 +1498,6 @@ fn front_sql_groupby_insert() { SELECT "b", "d" FROM "t" group by "b", "d" ON CONFLICT DO FAIL"#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" insert "t" on conflict: fail motion [policy: segment([value(NULL), ref("d")])] @@ -1614,7 +1608,6 @@ fn front_sql_total_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (total(("total_1"::double))::double -> "col_1", total(distinct ("gr_expr_1"::double))::double -> "col_2") motion [policy: full] @@ -1633,7 +1626,6 @@ fn front_sql_min_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (min(("min_1"::unsigned))::unsigned -> "col_1", min(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") motion [policy: full] @@ -1652,7 +1644,6 @@ fn front_sql_max_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (max(("max_1"::unsigned))::unsigned -> "col_1", max(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") motion [policy: full] @@ -1671,7 +1662,6 @@ fn front_sql_group_concat_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (group_concat(("group_concat_1"::string))::string -> "col_1", group_concat(distinct ("gr_expr_1"::string))::string -> "col_2") motion [policy: full] @@ -1690,7 +1680,6 @@ fn front_sql_group_concat_aggregate2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (group_concat(("group_concat_1"::string, ' '::string))::string -> "col_1", group_concat(distinct ("gr_expr_1"::string))::string -> "col_2") motion [policy: full] @@ -1740,7 +1729,6 @@ fn front_sql_count_asterisk1() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(("count_1"::unsigned))::unsigned -> "col_1", sum(("count_1"::unsigned))::unsigned -> "col_2") motion [policy: full] @@ -1758,7 +1746,6 @@ fn front_sql_count_asterisk2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(("count_1"::unsigned))::unsigned -> "col_1", "gr_expr_1"::unsigned -> "b") group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") @@ -1814,7 +1801,6 @@ fn front_sql_aggregates_with_distinct1() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "b", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "col_1", count(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") @@ -1835,7 +1821,6 @@ fn front_sql_aggregates_with_distinct2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "b", sum(distinct ("gr_expr_2"::decimal))::decimal -> "col_1") group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") @@ -1855,7 +1840,6 @@ fn front_sql_aggregates_with_distinct3() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(distinct ("gr_expr_1"::decimal))::decimal -> "col_1") motion [policy: full] @@ -1973,7 +1957,6 @@ fn front_sql_pg_style_params3() { let plan = sql_to_optimized_ir(input, vec![Value::Unsigned(42)]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "col_1") having ROW(sum(("count_1"::unsigned))::unsigned) > ROW(42::unsigned) @@ -2111,7 +2094,6 @@ fn front_sql_union_single_left() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all projection ("t"."a"::unsigned -> "a") @@ -2137,7 +2119,6 @@ fn front_sql_union_single_right() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all motion [policy: segment([ref("col_1")])] @@ -2163,7 +2144,6 @@ fn front_sql_union_single_both() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all motion [policy: segment([ref("col_1")])] @@ -2188,7 +2168,6 @@ fn front_sql_insert_single() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" insert "t" on conflict: fail motion [policy: segment([value(NULL), ref("col_2")])] @@ -2210,7 +2189,6 @@ fn front_sql_except_single_right() { "#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except projection ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b") @@ -2232,7 +2210,6 @@ fn front_sql_except_single_right() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except projection ("t"."b"::unsigned -> "b", "t"."a"::unsigned -> "a") @@ -2256,7 +2233,6 @@ fn front_sql_except_single_left() { "#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except motion [policy: segment([ref("col_1"), ref("col_2")])] @@ -2305,7 +2281,6 @@ fn front_sql_groupby_expression() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "col_1") group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1") @@ -2326,7 +2301,6 @@ fn front_sql_groupby_expression2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned + ROW(sum(("count_1"::unsigned))::unsigned) -> "col_1") group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") @@ -2347,7 +2321,6 @@ fn front_sql_groupby_expression3() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "col_1", "gr_expr_2"::unsigned * ROW(sum(("sum_1"::decimal))::decimal) / ROW(sum(("count_2"::unsigned))::unsigned) -> "col_2") group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "count_2"::unsigned -> "count_2", "sum_1"::decimal -> "sum_1") @@ -2368,7 +2341,6 @@ fn front_sql_groupby_expression4() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "col_1", "gr_expr_2"::unsigned -> "a") group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") @@ -2449,7 +2421,6 @@ fn front_sql_left_join_single_left() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."a"::decimal -> "a", "t2"."b"::unsigned -> "b") left join on ROW("t1"."a"::decimal) = ROW("t2"."b"::unsigned) @@ -2479,7 +2450,6 @@ fn front_sql_left_join_single_left2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // full motion should be under outer child insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."a"::decimal -> "a", "t2"."b"::unsigned -> "b") @@ -2510,7 +2480,6 @@ fn front_sql_left_join_single_both() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // full motion should be under outer child insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."a"::decimal -> "a", "t2"."b"::unsigned -> "b") @@ -2568,7 +2537,8 @@ fn front_sql_having1() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); + println!("Formatted arena: {}", plan.formatted_arena().unwrap()); + insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "a", sum(("sum_1"::decimal))::decimal -> "col_1") having ROW("gr_expr_1"::unsigned) > ROW(1::unsigned) and ROW(sum(distinct ("gr_expr_2"::decimal))::decimal) > ROW(1::unsigned) @@ -2591,7 +2561,6 @@ fn front_sql_having2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (ROW(sum(("sum_1"::decimal))::decimal) * ROW(count(distinct ("gr_expr_1"::unsigned))::unsigned) -> "col_1", sum(("sum_1"::decimal))::decimal -> "col_2") having ROW(sum(distinct ("gr_expr_1"::decimal))::decimal) > ROW(1::unsigned) and ROW(sum(("sum_1"::decimal))::decimal) > ROW(1::unsigned) @@ -2613,7 +2582,6 @@ fn front_sql_having3() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(("sum_1"::decimal))::decimal -> "col_1") having ROW(sum(("sum_1"::decimal))::decimal) > ROW(1::unsigned) @@ -2651,7 +2619,6 @@ fn front_sql_having_with_sq() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "sysFrom", sum(distinct ("gr_expr_2"::decimal))::decimal -> "sum", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "count") having ROW($0) > ROW(count(distinct ("gr_expr_2"::unsigned))::unsigned) @@ -2699,7 +2666,6 @@ fn front_sql_having_with_sq_segment_motion() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "sysFrom", "gr_expr_2"::unsigned -> "sys_op", sum(distinct ("gr_expr_3"::decimal))::decimal -> "sum", count(distinct ("gr_expr_3"::unsigned))::unsigned -> "count") having ROW("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) in ROW($0, $0) @@ -2731,7 +2697,6 @@ fn front_sql_having_with_sq_segment_local_motion() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "sysFrom", "gr_expr_2"::unsigned -> "sys_op", sum(distinct ("gr_expr_3"::decimal))::decimal -> "sum", count(distinct ("gr_expr_3"::unsigned))::unsigned -> "count") having ROW("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) in ROW($0, $0) @@ -2757,7 +2722,6 @@ fn front_sql_unique_local_aggregates() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // here we must compute only two aggregates at local stage: sum(a), count(a) insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2", ROW(sum(("sum_1"::decimal))::decimal) + ROW(sum(("count_2"::unsigned))::unsigned) -> "col_3") @@ -2779,7 +2743,6 @@ fn front_sql_unique_local_groupings() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // here we must compute only two groupby columns at local stage: a, b insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(distinct ("gr_expr_2"::decimal))::decimal -> "col_1", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "col_2", count(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_3") @@ -2838,7 +2801,6 @@ fn front_sql_select_distinct() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // here we must compute only two groupby columns at local stage: a, b insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "a", "gr_expr_2"::unsigned -> "col_1") @@ -2859,7 +2821,6 @@ fn front_sql_select_distinct_asterisk() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("gr_expr_1"::unsigned -> "a", "gr_expr_2"::unsigned -> "b", "gr_expr_3"::unsigned -> "c", "gr_expr_4"::unsigned -> "d") group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned, "gr_expr_3"::unsigned, "gr_expr_4"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "gr_expr_3"::unsigned -> "gr_expr_3", "gr_expr_4"::unsigned -> "gr_expr_4") @@ -2896,7 +2857,6 @@ fn front_sql_select_distinct_with_aggr() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(("sum_1"::decimal))::decimal -> "col_1", "gr_expr_1"::unsigned -> "b") group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "sum_1"::decimal -> "sum_1") @@ -2916,7 +2876,6 @@ fn front_sql_select_distinct_with_aggr2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] @@ -3233,7 +3192,6 @@ fn front_sql_update6() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" update "t3" "b" = "col_0" @@ -3743,7 +3701,6 @@ fn front_subqueries_interpreted_as_expression_under_group_by() { let input = r#"SELECT COUNT(*) FROM "test_space" GROUP BY "id" + (VALUES (1))"#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (sum(("count_1"::unsigned))::unsigned -> "col_1") group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") @@ -3919,9 +3876,6 @@ fn front_sql_whitespaces_are_not_ignored() { for query in correct_queries { let res = ParseTree::parse(Rule::Command, query); - if res.is_err() { - println!("Query [{query}] is invalid.") - } assert!(res.is_ok()); } @@ -3932,9 +3886,6 @@ fn front_sql_whitespaces_are_not_ignored() { fixed.push_str(&query[..wp_idx]); fixed.push_str(&query[wp_idx + 1..]); let res = ParseTree::parse(Rule::Command, &fixed); - if res.is_ok() { - println!("Query [{fixed}] is valid.") - } assert!(res.is_err()) } } diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/coalesce.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/coalesce.rs index b2fd230860..96d404056c 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/coalesce.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/coalesce.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn coalesce_in_projection() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/funcs.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/funcs.rs index 9f0cc48dc5..f229685e21 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/funcs.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/funcs.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn lower_upper() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/insert.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/insert.rs index 03066c1aa2..e6191f9bce 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/insert.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/insert.rs @@ -1,6 +1,5 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; use crate::ir::value::Value; -use pretty_assertions::assert_eq; #[test] fn insert1() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/join.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/join.rs index d6aaf543ff..6f5d2f2c4f 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/join.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/join.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn milti_join1() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs index 82874b855a..7a34cf10ca 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn select() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/trim.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/trim.rs index 59f78d0384..d7033ccf06 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/trim.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/trim.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn trim() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/union.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/union.rs index 19f7a853dd..8c6fd8ce03 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/union.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/union.rs @@ -1,5 +1,3 @@ -use pretty_assertions::assert_eq; - use crate::ir::transformation::helpers::sql_to_optimized_ir; #[test] diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/update.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/update.rs index fba5034221..4d27d91d4f 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/update.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/update.rs @@ -1,6 +1,5 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; use crate::ir::value::Value; -use pretty_assertions::assert_eq; #[test] fn update1() { diff --git a/sbroad/sbroad-core/src/ir/explain/tests.rs b/sbroad/sbroad-core/src/ir/explain/tests.rs index e4d957606a..503c893695 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests.rs @@ -13,7 +13,6 @@ fn simple_query_without_cond_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."identification_number"::integer -> "c1", "t"."product_code"::string -> "product_code") scan "hash_testing" -> "t" @@ -32,7 +31,6 @@ fn simple_query_with_cond_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."identification_number"::integer -> "c1", "t"."product_code"::string -> "product_code") selection ROW("t"."identification_number"::integer) = ROW(1::unsigned) and ROW("t"."product_code"::string) = ROW('222'::string) @@ -82,7 +80,6 @@ WHERE "id" = 1"#; let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."id"::unsigned -> "id", "t"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("t"."id"::unsigned) = ROW(1::unsigned) @@ -121,7 +118,6 @@ WHERE "id" IN (SELECT "id" let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."id"::unsigned -> "id", "t"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("t"."id"::unsigned) in ROW($0) @@ -203,7 +199,6 @@ fn motion_subquery_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."id"::unsigned -> "id", "t"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("t"."id"::unsigned) in ROW($1) or ROW("t"."id"::unsigned) in ROW($0) @@ -251,7 +246,6 @@ WHERE "t2"."product_code" = '123'"#; let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t1"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("t2"."product_code"::string) = ROW('123'::string) @@ -281,7 +275,6 @@ FROM (SELECT "id", "FIRST_NAME" FROM "test_space" WHERE "id" = 3) as "t1" let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t1"."FIRST_NAME"::string -> "FIRST_NAME") join on ROW("t1"."id"::unsigned) = ROW($0) @@ -313,7 +306,6 @@ fn unary_condition_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("test_space"."id"::unsigned -> "id", "test_space"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("test_space"."id"::unsigned) is null and not ROW("test_space"."FIRST_NAME"::string) is null @@ -408,7 +400,6 @@ fn select_value_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("COLUMN_1"::unsigned -> "COLUMN_1") scan @@ -429,7 +420,6 @@ fn select_cast_plan1() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("test_space"."id"::unsigned::unsigned -> "b") scan "test_space" @@ -448,7 +438,6 @@ fn select_cast_plan2() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("test_space"."id"::unsigned -> "id", "test_space"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("test_space"."id"::unsigned::int) = ROW(1::unsigned) @@ -468,7 +457,6 @@ fn select_cast_plan_nested() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("func"(("test_space"."id"::unsigned))::integer::string -> "col_1") scan "test_space" diff --git a/sbroad/sbroad-core/src/ir/explain/tests/cast_constants.rs b/sbroad/sbroad-core/src/ir/explain/tests/cast_constants.rs index e31bb9e321..80ae25a543 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/cast_constants.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/cast_constants.rs @@ -1,5 +1,4 @@ use crate::executor::{engine::mock::RouterRuntimeMock, Query}; -use pretty_assertions::assert_eq; #[test] fn select_values_rows() { diff --git a/sbroad/sbroad-core/src/ir/explain/tests/concat.rs b/sbroad/sbroad-core/src/ir/explain/tests/concat.rs index a6d5547b7d..7bac576cdb 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/concat.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/concat.rs @@ -4,8 +4,6 @@ use super::*; fn concat1_test() { let sql = r#"SELECT CAST('1' as string) || 'hello' FROM "t1""#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (ROW('1'::string) || ROW('hello'::string) -> "col_1") scan "t1" @@ -19,8 +17,6 @@ fn concat1_test() { fn concat2_test() { let sql = r#"SELECT "a" FROM "t1" WHERE CAST('1' as string) || FUNC('hello') || '2' = 42"#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."a"::string -> "a") selection ROW(ROW(ROW('1'::string) || ROW("func"(('hello'::string))::integer)) || ROW('2'::string)) = ROW(42::unsigned) diff --git a/sbroad/sbroad-core/src/ir/explain/tests/delete.rs b/sbroad/sbroad-core/src/ir/explain/tests/delete.rs index 49a2766ae5..b18756ab08 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/delete.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/delete.rs @@ -4,8 +4,6 @@ use super::*; fn delete1_test() { let sql = r#"DELETE FROM "t1""#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" delete "t1" execution options: @@ -18,8 +16,6 @@ fn delete1_test() { fn delete2_test() { let sql = r#"DELETE FROM "t1" where "a" > 3"#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" delete "t1" motion [policy: local] @@ -36,8 +32,6 @@ fn delete2_test() { fn delete3_test() { let sql = r#"DELETE FROM "t1" where "a" in (SELECT "b" from "t1")"#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" delete "t1" motion [policy: local] diff --git a/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs b/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs index 5b3ee71b9f..d56b5460c1 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs @@ -1,6 +1,3 @@ -use pretty_assertions::assert_eq; -use smol_str::ToSmolStr; - use crate::executor::{engine::mock::RouterRuntimeMock, Query}; #[test] -- GitLab From d9cd756ddb8f7a6e802fb6027b9edb98c78262a8 Mon Sep 17 00:00:00 2001 From: EmirVildanov <reddog201030@gmail.com> Date: Tue, 25 Mar 2025 13:02:06 +0300 Subject: [PATCH 4/4] fix: handle grouping expressions without parent, add test for such case --- sbroad/sbroad-core/src/ir/aggregates.rs | 12 ++--- .../transformation/redistribution/groupby.rs | 44 ++++++++++++------- test/int/sql/groupby.sql | 13 +++++- 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/sbroad/sbroad-core/src/ir/aggregates.rs b/sbroad/sbroad-core/src/ir/aggregates.rs index 326c6b46be..df7170fe75 100644 --- a/sbroad/sbroad-core/src/ir/aggregates.rs +++ b/sbroad/sbroad-core/src/ir/aggregates.rs @@ -481,7 +481,7 @@ struct AggregateSignature<'plan> { pub local_alias: Rc<String>, } -impl<'plan> Hash for AggregateSignature<'plan> { +impl Hash for AggregateSignature<'_> { fn hash<H: Hasher>(&self, state: &mut H) { self.kind.hash(state); let mut comp = Comparator::new(self.plan); @@ -492,7 +492,7 @@ impl<'plan> Hash for AggregateSignature<'plan> { } } -impl<'plan> PartialEq<Self> for AggregateSignature<'plan> { +impl PartialEq<Self> for AggregateSignature<'_> { fn eq(&self, other: &Self) -> bool { let comparator = Comparator::new(self.plan); self.kind == other.kind @@ -504,7 +504,7 @@ impl<'plan> PartialEq<Self> for AggregateSignature<'plan> { } } -impl<'plan> Eq for AggregateSignature<'plan> {} +impl Eq for AggregateSignature<'_> {} fn aggr_local_alias(kind: AggregateKind, index: usize) -> String { format!("{kind}_{index}") @@ -517,9 +517,9 @@ impl Plan { /// TODO: We should also support OrderBy. /// /// # Arguments - /// [`finals`] - ids of nodes in final (reduce stage) before adding two stage aggregation. - /// It may contain ids of `Projection`, `Having` or `NamedWindows`. - /// Note: final `GroupBy` is not present because it will be added later in 2-stage pipeline. + /// * `finals` - ids of nodes in final (reduce stage) before adding two stage aggregation. + /// It may contain ids of `Projection`, `Having` or `NamedWindows`. + /// Note: final `GroupBy` is not present because it will be added later in 2-stage pipeline. pub fn collect_aggregates(&self, finals: &Vec<NodeId>) -> Result<Vec<Aggregate>, SbroadError> { let mut aggrs = Vec::with_capacity(AGGR_CAPACITY); for node_id in finals { diff --git a/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs b/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs index 69899d3582..81f1576104 100644 --- a/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs +++ b/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs @@ -1,3 +1,4 @@ +use ahash::AHashMap; use smol_str::{format_smolstr, ToSmolStr}; use crate::errors::{Entity, SbroadError}; @@ -7,7 +8,7 @@ use crate::ir::aggregates::Aggregate; use crate::ir::distribution::Distribution; use crate::ir::expression::{ColumnPositionMap, Comparator, EXPR_HASH_DEPTH}; use crate::ir::node::expression::Expression; -use crate::ir::node::relational::Relational; +use crate::ir::node::relational::{MutRelational, Relational}; use crate::ir::node::{Alias, ArenaType, GroupBy, Having, NodeId, Projection, Reference}; use crate::ir::transformation::redistribution::{ MotionKey, MotionPolicy, Program, Strategy, Target, @@ -37,13 +38,13 @@ struct ExpressionLocationId { /// Id of grouping expression. pub expr_id: NodeId, /// Id of expression which is a parent of `expr`. - pub parent_expr_id: NodeId, + pub parent_expr_id: Option<NodeId>, /// Relational node in which this `expr` is used. pub rel_id: NodeId, } impl ExpressionLocationId { - pub fn new(expr_id: NodeId, parent_expr_id: NodeId, rel_id: NodeId) -> Self { + pub fn new(expr_id: NodeId, parent_expr_id: Option<NodeId>, rel_id: NodeId) -> Self { ExpressionLocationId { parent_expr_id, expr_id, @@ -82,7 +83,7 @@ impl PartialEq for GroupingExpression<'_> { } } -impl<'plan> Eq for GroupingExpression<'plan> {} +impl Eq for GroupingExpression<'_> {} /// Maps id of `GroupBy` expression used in `GroupBy` (from local stage) /// to list of locations where this expression is used in other relational @@ -97,7 +98,7 @@ impl<'plan> Eq for GroupingExpression<'plan> {} /// In case there is a reference (or expression containing it) in the final relational operator /// that doesn't correspond to any GroupBy expression, an error should have been thrown on the /// stage of `collect_grouping_expressions`. -type GroupbyExpressionsMap = HashMap<NodeId, Vec<ExpressionLocationId>>; +type GroupbyExpressionsMap = AHashMap<NodeId, Vec<ExpressionLocationId>>; /// Maps id of `GroupBy` expression used in `GroupBy` (from local stage) /// to corresponding local alias used in local Projection. Note: @@ -174,12 +175,6 @@ impl<'plan> ExpressionMapper<'plan> { }) .copied() { - let parent_expr = parent_expr.unwrap_or_else(|| { - panic!( - "parent expression for grouping expression under {:?} rel node should be found", - self.rel_id - ) - }); let location = ExpressionLocationId::new(current, parent_expr, self.rel_id); if let Some(v) = self.map.get_mut(&gr_expr) { v.push(location); @@ -269,7 +264,7 @@ impl GroupByInfo { Self { id, grouping_exprs: Vec::with_capacity(GR_EXPR_CAPACITY), - gr_exprs_map: HashMap::with_capacity(GR_EXPR_CAPACITY), + gr_exprs_map: AHashMap::with_capacity(GR_EXPR_CAPACITY), grouping_expr_to_alias_map: OrderedMap::with_hasher(RepeatableState), reduce_info: GroupByReduceInfo::new(), } @@ -814,14 +809,13 @@ impl Plan { &mut self, groupby_info: &GroupByInfo, ) -> Result<(), SbroadError> { - println!("Enter patch_grouping_expressions"); let local_aliases_map = &groupby_info.reduce_info.local_aliases_map; let gr_exprs_map = &groupby_info.gr_exprs_map; type RelationalID = NodeId; type GroupByExpressionID = NodeId; type ExpressionID = NodeId; - type ExpressionParent = NodeId; + type ExpressionParent = Option<NodeId>; // Map of { Relation -> vec![( // expr_id under group by // expr_id of the same expr under other relation (e.g. Projection) @@ -853,7 +847,6 @@ impl Plan { "expected relation node ({rel_id:?}) to have children!" )) })?; - println!("Before call ColumnPositionMap::new"); let alias_to_pos_map = ColumnPositionMap::new(self, child_id)?; let mut nodes = Vec::with_capacity(group.len()); for (gr_expr_id, expr_id, parent_expr_id) in group { @@ -888,7 +881,26 @@ impl Plan { } for (parent_expr_id, expr_id, node) in nodes { let ref_id = self.nodes.push(node.into()); - self.replace_expression(parent_expr_id, expr_id, ref_id)?; + if let Some(parent_expr_id) = parent_expr_id { + self.replace_expression(parent_expr_id, expr_id, ref_id)?; + } else { + // Grouping expression doesn't have parent grouping expression. + let rel_node = self.get_mut_relation_node(rel_id)?; + match rel_node { + MutRelational::Having(Having { filter, .. }) => { + // E.g. `select a from t group by a having a`. + if *filter == expr_id { + *filter = ref_id; + } + } + _ => { + // Currently Having is the only relational node in which grouping expression + // can not have a parent expression (under Projection all expressions are covered + // with output Row node). + panic!("Unexpected final node met for expression replacement: {rel_node:?}") + } + } + } } } Ok(()) diff --git a/test/int/sql/groupby.sql b/test/int/sql/groupby.sql index 6fc51c77c2..5213986b85 100644 --- a/test/int/sql/groupby.sql +++ b/test/int/sql/groupby.sql @@ -6,6 +6,11 @@ INSERT INTO t VALUES(1, 1); INSERT INTO t VALUES(2, 1); INSERT INTO t VALUES(3, 2); INSERT INTO t VALUES(4, 3); +DROP TABLE IF EXISTS tb; +CREATE TABLE tb(a INT PRIMARY KEY, b BOOLEAN); +INSERT INTO tb VALUES(1, true); +INSERT INTO tb VALUES(2, true); +INSERT INTO tb VALUES(3, false); -- TEST: reference-under-case-expression -- SQL: @@ -41,4 +46,10 @@ SELECT CASE WHEN a <= 4 THEN 42 END AS c FROM t ORDER BY c; 42, 42, 42, -42 \ No newline at end of file +42 + +-- TEST: having-with-boolean-column +-- SQL: +SELECT sum(a) FROM tb GROUP BY b HAVING b; +-- EXPECTED: +3 \ No newline at end of file -- GitLab