diff --git a/sbroad/sbroad-cartridge/test_app/test/integration/groupby_test.lua b/sbroad/sbroad-cartridge/test_app/test/integration/groupby_test.lua index 3891dd00ad51e991043265492756ab0836c55853..75c4297bd51c9da82e4326ba645687516b00f0e5 100644 --- a/sbroad/sbroad-cartridge/test_app/test/integration/groupby_test.lua +++ b/sbroad/sbroad-cartridge/test_app/test/integration/groupby_test.lua @@ -881,7 +881,7 @@ groupby_queries.test_aggr_invalid = function() local _, err = api:call("sbroad.execute", { [[ SELECT "d", count(sum("e")) from "arithmetic_space" group by "d"]], {} }) - t.assert_str_contains(tostring(err), "aggregate function inside aggregate function") + t.assert_str_contains(tostring(err), "aggregate functions inside aggregate function") end groupby_queries.test_groupby_arith_expression = function() diff --git a/sbroad/sbroad-core/src/errors.rs b/sbroad/sbroad-core/src/errors.rs index 59a1bf2836f7705cdf7670364b48db53c3920ae1..98d43c2c57febcac16086347f1fb1b1977007170 100644 --- a/sbroad/sbroad-core/src/errors.rs +++ b/sbroad/sbroad-core/src/errors.rs @@ -400,20 +400,20 @@ impl std::error::Error for SbroadError {} impl<E: fmt::Debug> From<TransactionError<E>> for SbroadError { fn from(error: TransactionError<E>) -> Self { - SbroadError::FailedTo( - Action::Create, - Some(Entity::Transaction), - format_smolstr!("{error:?}"), + SbroadError::Invalid( + Entity::Transaction, + Some(format_smolstr!("Transaction error occurred: {error:?}")), ) } } impl From<Error> for SbroadError { fn from(error: Error) -> Self { - SbroadError::FailedTo( - Action::Create, - Some(Entity::Tarantool), - format_smolstr!("{error:?}"), + SbroadError::Invalid( + Entity::Tarantool, + Some(format_smolstr!( + "Tarantool module error occurred: {error:?}" + )), ) } } diff --git a/sbroad/sbroad-core/src/executor/tests.rs b/sbroad/sbroad-core/src/executor/tests.rs index 3f7c3337e3271769732dc1d39c1fb723db3ae4f5..61eda6879a92867b127ace27f890054bb98af90f 100644 --- a/sbroad/sbroad-core/src/executor/tests.rs +++ b/sbroad/sbroad-core/src/executor/tests.rs @@ -894,9 +894,9 @@ fn groupby_linker_test() { LuaValue::String(String::from(PatternWithParams::new( format!( "{} {} {}", - r#"SELECT "column_596" as "ii" FROM"#, + r#"SELECT "gr_expr_1" as "ii" FROM"#, r#"(SELECT "COL_1" FROM "TMP_test_0136")"#, - r#"GROUP BY "column_596""#, + r#"GROUP BY "gr_expr_1""#, ), vec![], ))), diff --git a/sbroad/sbroad-core/src/executor/tests/exec_plan.rs b/sbroad/sbroad-core/src/executor/tests/exec_plan.rs index 751d24a4f771f882827652dcb9f7a8fde43898bf..3f6b0afaf44bb354c9999a7e457a4a7481e60341 100644 --- a/sbroad/sbroad-core/src/executor/tests/exec_plan.rs +++ b/sbroad/sbroad-core/src/executor/tests/exec_plan.rs @@ -112,7 +112,7 @@ fn exec_plan_subtree_two_stage_groupby_test() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT "T1"."FIRST_NAME" as "column_596" FROM "test_space" as "T1" GROUP BY "T1"."FIRST_NAME""# + r#"SELECT "T1"."FIRST_NAME" as "gr_expr_1" FROM "test_space" as "T1" GROUP BY "T1"."FIRST_NAME""# .to_string(), vec![] ) @@ -159,9 +159,9 @@ fn exec_plan_subtree_two_stage_groupby_test_2() { sql, PatternWithParams::new( f_sql( - r#"SELECT "T1"."FIRST_NAME" as "column_596", -"T1"."sys_op" as "column_696", -"T1"."sysFrom" as "column_796" + r#"SELECT "T1"."FIRST_NAME" as "gr_expr_1", +"T1"."sys_op" as "gr_expr_2", +"T1"."sysFrom" as "gr_expr_3" FROM "test_space" as "T1" GROUP BY "T1"."FIRST_NAME", "T1"."sys_op", "T1"."sysFrom""# ), @@ -223,7 +223,7 @@ fn exec_plan_subtree_aggregates() { panic!("Expected MotionPolicy::Segment for local aggregation stage"); }; assert_eq!(sql.params, vec![Value::from("o")]); - insta::assert_snapshot!(sql.pattern, @r#"SELECT "T1"."sys_op" as "column_596", ("T1"."id") * ("T1"."sys_op") as "column_1632", "T1"."id" as "column_2096", count ("T1"."sysFrom") as "count_1596", sum ("T1"."id") as "sum_1796", count ("T1"."id") as "count_2696", min ("T1"."id") as "min_3096", group_concat ("T1"."FIRST_NAME", ?) as "group_concat_2496", total ("T1"."id") as "total_2896", max ("T1"."id") as "max_3296" FROM "test_space" as "T1" GROUP BY "T1"."sys_op", ("T1"."id") * ("T1"."sys_op"), "T1"."id""#); + insta::assert_snapshot!(sql.pattern, @r#"SELECT "T1"."sys_op" as "gr_expr_1", ("T1"."id") * ("T1"."sys_op") as "gr_expr_2", "T1"."id" as "gr_expr_3", count ("T1"."sysFrom") as "count_1", sum ("T1"."id") as "sum_2", count ("T1"."id") as "avg_4", min ("T1"."id") as "min_6", group_concat ("T1"."FIRST_NAME", ?) as "group_concat_3", total ("T1"."id") as "total_5", max ("T1"."id") as "max_7" FROM "test_space" as "T1" GROUP BY "T1"."sys_op", ("T1"."id") * ("T1"."sys_op"), "T1"."id""#); // Check main query let sql = get_sql_from_execution_plan(exec_plan, top_id, Snapshot::Oldest, TEMPLATE); @@ -260,7 +260,7 @@ fn exec_plan_subtree_aggregates_no_groupby() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT ("T1"."id") + ("T1"."sysFrom") as "column_632", count ("T1"."sysFrom") as "count_696" FROM "test_space" as "T1" GROUP BY ("T1"."id") + ("T1"."sysFrom")"#.to_string(), + r#"SELECT ("T1"."id") + ("T1"."sysFrom") as "gr_expr_1", count ("T1"."sysFrom") as "count_1" FROM "test_space" as "T1" GROUP BY ("T1"."id") + ("T1"."sysFrom")"#.to_string(), vec![] )); @@ -419,7 +419,7 @@ fn exec_plan_subtree_count_asterisk() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT count (*) as "count_596" FROM "test_space""#.to_string(), + r#"SELECT count (*) as "count_1" FROM "test_space""#.to_string(), vec![] ) ); @@ -473,8 +473,8 @@ fn exec_plan_subtree_having() { PatternWithParams::new( format!( "{} {} {}", - r#"SELECT "T1"."sys_op" as "column_596", ("T1"."sys_op") * (?) as "column_2032","#, - r#"count (("T1"."sys_op") * (?)) as "count_2296" FROM "test_space" as "T1""#, + r#"SELECT "T1"."sys_op" as "gr_expr_1", ("T1"."sys_op") * (?) as "gr_expr_2","#, + r#"count (("T1"."sys_op") * (?)) as "count_1" FROM "test_space" as "T1""#, r#"GROUP BY "T1"."sys_op", ("T1"."sys_op") * (?)"#, ), vec![Value::Unsigned(2), Value::Unsigned(2), Value::Unsigned(2)] @@ -536,8 +536,8 @@ fn exec_plan_subtree_having_without_groupby() { PatternWithParams::new( format!( "{} {} {}", - r#"SELECT ("T1"."sys_op") * (?) as "column_1332","#, - r#"count (("T1"."sys_op") * (?)) as "count_1496" FROM "test_space" as "T1""#, + r#"SELECT ("T1"."sys_op") * (?) as "gr_expr_1","#, + r#"count (("T1"."sys_op") * (?)) as "count_1" FROM "test_space" as "T1""#, r#"GROUP BY ("T1"."sys_op") * (?)"#, ), vec![Value::Unsigned(2), Value::Unsigned(2), Value::Unsigned(2)] @@ -699,7 +699,7 @@ fn exec_plan_subquery_as_expression_under_group_by() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT ("test_space"."id") + (VALUES (?)) as "column_932", count (*) as "count_1496" FROM "test_space" GROUP BY ("test_space"."id") + (VALUES (?))"#.to_string(), + r#"SELECT ("test_space"."id") + (VALUES (?)) as "gr_expr_1", count (*) as "count_1" FROM "test_space" GROUP BY ("test_space"."id") + (VALUES (?))"#.to_string(), vec![Value::Unsigned(1u64), Value::Unsigned(1u64)] ) ); @@ -709,7 +709,7 @@ fn exec_plan_subquery_as_expression_under_group_by() { assert_eq!( sql, PatternWithParams::new( - r#"SELECT sum ("count_1496") as "col_1" FROM (SELECT "COL_1" FROM "TMP_test_0136") GROUP BY "COL_1""#.to_string(), + r#"SELECT sum ("count_1") as "col_1" FROM (SELECT "COL_1" FROM "TMP_test_0136") GROUP BY "COL_1""#.to_string(), vec![] ) ); diff --git a/sbroad/sbroad-core/src/executor/tests/frontend.rs b/sbroad/sbroad-core/src/executor/tests/frontend.rs index b57745298cb297ee5b98d336d7b4f2b1b01c241c..7d8a6ce66666d60e7e0ceb2326e272708dfde58f 100644 --- a/sbroad/sbroad-core/src/executor/tests/frontend.rs +++ b/sbroad/sbroad-core/src/executor/tests/frontend.rs @@ -2,7 +2,6 @@ use super::*; use crate::executor::engine::mock::RouterRuntimeMock; use pretty_assertions::assert_eq; -use smol_str::format_smolstr; #[test] fn front_valid_sql1() { diff --git a/sbroad/sbroad-core/src/frontend/sql.rs b/sbroad/sbroad-core/src/frontend/sql.rs index 3763ad179c352051798dab70b19bd4dd043cdf61..29639c95bab0ef6919e7a3319b10e1747db7f8fb 100644 --- a/sbroad/sbroad-core/src/frontend/sql.rs +++ b/sbroad/sbroad-core/src/frontend/sql.rs @@ -2471,7 +2471,7 @@ impl ParseExpression { let arg_plan_id = arg.populate_plan(plan, worker)?; plan_arg_ids.push(arg_plan_id); } - if let Some(kind) = AggregateKind::new(name) { + if let Some(kind) = AggregateKind::from_name(name) { plan.add_aggregate_function(name, kind, plan_arg_ids, is_distinct)? } else if is_distinct { return Err(SbroadError::Invalid( diff --git a/sbroad/sbroad-core/src/frontend/sql/ir.rs b/sbroad/sbroad-core/src/frontend/sql/ir.rs index a5d233d13233dfacfa15a2d6c912bac49fbfa187..1811e2a42cb4050cb6758c1f3156b02c131d98ff 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir.rs @@ -450,7 +450,6 @@ impl SubtreeCloner { children: _, gr_exprs, output: _, - is_final: _, }) => { *gr_exprs = self.copy_list(gr_exprs)?; for expr_id in gr_exprs.iter() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs index d4e92d2adc92d5bd60bf0882884e1e2ad45250ac..2b2993c6fc340ff36a563ca4341a534730ed61c2 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs @@ -845,9 +845,9 @@ fn front_order_by_over_single_distribution_must_not_add_motion() { scan projection ("id_count"::unsigned -> "id_count") scan - projection (sum(("count_696"::unsigned))::unsigned -> "id_count") + projection (sum(("count_1"::unsigned))::unsigned -> "id_count") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_696") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -1296,12 +1296,11 @@ fn front_sql_groupby() { let input = r#"SELECT "identification_number", "product_code" FROM "hash_testing" group by "identification_number", "product_code""#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::integer -> "identification_number", "column_696"::string -> "product_code") - group by ("column_596"::integer, "column_696"::string) output: ("column_596"::integer -> "column_596", "column_696"::string -> "column_696") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("hash_testing"."identification_number"::integer -> "column_596", "hash_testing"."product_code"::string -> "column_696") + projection ("gr_expr_1"::integer -> "identification_number", "gr_expr_2"::string -> "product_code") + group by ("gr_expr_1"::integer, "gr_expr_2"::string) output: ("gr_expr_1"::integer -> "gr_expr_1", "gr_expr_2"::string -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("hash_testing"."identification_number"::integer -> "gr_expr_1", "hash_testing"."product_code"::string -> "gr_expr_2") group by ("hash_testing"."identification_number"::integer, "hash_testing"."product_code"::string) output: ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code", "hash_testing"."product_units"::boolean -> "product_units", "hash_testing"."sys_op"::unsigned -> "sys_op", "hash_testing"."bucket_id"::unsigned -> "bucket_id") scan "hash_testing" execution options: @@ -1319,12 +1318,11 @@ fn front_sql_groupby_less_cols_in_proj() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::integer -> "identification_number") - group by ("column_596"::integer, "column_696"::boolean) output: ("column_596"::integer -> "column_596", "column_696"::boolean -> "column_696") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("hash_testing"."identification_number"::integer -> "column_596", "hash_testing"."product_units"::boolean -> "column_696") + projection ("gr_expr_1"::integer -> "identification_number") + group by ("gr_expr_1"::integer, "gr_expr_2"::boolean) output: ("gr_expr_1"::integer -> "gr_expr_1", "gr_expr_2"::boolean -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("hash_testing"."identification_number"::integer -> "gr_expr_1", "hash_testing"."product_units"::boolean -> "gr_expr_2") group by ("hash_testing"."identification_number"::integer, "hash_testing"."product_units"::boolean) output: ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code", "hash_testing"."product_units"::boolean -> "product_units", "hash_testing"."sys_op"::unsigned -> "sys_op", "hash_testing"."bucket_id"::unsigned -> "bucket_id") scan "hash_testing" execution options: @@ -1344,10 +1342,10 @@ fn front_sql_groupby_union_1() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all - projection ("column_596"::integer -> "identification_number") - group by ("column_596"::integer) output: ("column_596"::integer -> "column_596") - motion [policy: segment([ref("column_596")])] - projection ("hash_testing"."identification_number"::integer -> "column_596") + projection ("gr_expr_1"::integer -> "identification_number") + group by ("gr_expr_1"::integer) output: ("gr_expr_1"::integer -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("hash_testing"."identification_number"::integer -> "gr_expr_1") group by ("hash_testing"."identification_number"::integer) output: ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code", "hash_testing"."product_units"::boolean -> "product_units", "hash_testing"."sys_op"::unsigned -> "sys_op", "hash_testing"."bucket_id"::unsigned -> "bucket_id") scan "hash_testing" projection ("hash_testing"."identification_number"::integer -> "identification_number") @@ -1368,7 +1366,6 @@ fn front_sql_groupby_union_2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all projection ("hash_testing"."identification_number"::integer -> "identification_number") @@ -1376,10 +1373,10 @@ fn front_sql_groupby_union_2() { projection ("identification_number"::integer -> "identification_number") scan union all - projection ("column_1196"::integer -> "identification_number") - group by ("column_1196"::integer) output: ("column_1196"::integer -> "column_1196") - motion [policy: segment([ref("column_1196")])] - projection ("hash_testing"."identification_number"::integer -> "column_1196") + projection ("gr_expr_1"::integer -> "identification_number") + group by ("gr_expr_1"::integer) output: ("gr_expr_1"::integer -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("hash_testing"."identification_number"::integer -> "gr_expr_1") group by ("hash_testing"."identification_number"::integer) output: ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code", "hash_testing"."product_units"::boolean -> "product_units", "hash_testing"."sys_op"::unsigned -> "sys_op", "hash_testing"."bucket_id"::unsigned -> "bucket_id") scan "hash_testing" projection ("hash_testing"."identification_number"::integer -> "identification_number") @@ -1400,12 +1397,11 @@ fn front_sql_groupby_join_1() { "#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_2496"::string -> "product_code", "column_2596"::boolean -> "product_units") - group by ("column_2496"::string, "column_2596"::boolean) output: ("column_2496"::string -> "column_2496", "column_2596"::boolean -> "column_2596") - motion [policy: segment([ref("column_2496"), ref("column_2596")])] - projection ("t2"."product_code"::string -> "column_2496", "t2"."product_units"::boolean -> "column_2596") + projection ("gr_expr_1"::string -> "product_code", "gr_expr_2"::boolean -> "product_units") + group by ("gr_expr_1"::string, "gr_expr_2"::boolean) output: ("gr_expr_1"::string -> "gr_expr_1", "gr_expr_2"::boolean -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t2"."product_code"::string -> "gr_expr_1", "t2"."product_units"::boolean -> "gr_expr_2") group by ("t2"."product_code"::string, "t2"."product_units"::boolean) output: ("t2"."product_units"::boolean -> "product_units", "t2"."product_code"::string -> "product_code", "t2"."identification_number"::integer -> "identification_number", "t"."id"::unsigned -> "id") join on ROW("t2"."identification_number"::integer) = ROW("t"."id"::unsigned) scan "t2" @@ -1475,7 +1471,6 @@ fn front_sql_join() { "#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // TODO: For the hash function in the cartrisge runtime we can apply // `motion [policy: segment([ref("id")])]` instead of the `motion [policy: full]`. @@ -1487,9 +1482,9 @@ fn front_sql_join() { scan "hash_single_testing" motion [policy: full] scan "t2" - projection (sum(("sum_1796"::decimal))::decimal -> "id") + projection (sum(("sum_1"::decimal))::decimal -> "id") motion [policy: full] - projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1796") + projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -1503,14 +1498,13 @@ fn front_sql_groupby_insert() { SELECT "b", "d" FROM "t" group by "b", "d" ON CONFLICT DO FAIL"#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" insert "t" on conflict: fail motion [policy: segment([value(NULL), ref("d")])] - projection ("column_596"::unsigned -> "b", "column_696"::unsigned -> "d") - group by ("column_596"::unsigned, "column_696"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_696"::unsigned -> "column_696") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("t"."b"::unsigned -> "column_596", "t"."d"::unsigned -> "column_696") + projection ("gr_expr_1"::unsigned -> "b", "gr_expr_2"::unsigned -> "d") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t"."b"::unsigned -> "gr_expr_1", "t"."d"::unsigned -> "gr_expr_2") group by ("t"."b"::unsigned, "t"."d"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1553,10 +1547,10 @@ fn front_sql_aggregates() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "b", ROW(sum(("count_1496"::unsigned))::unsigned) + ROW(sum(("count_1596"::unsigned))::unsigned) -> "col_1") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "count_1596"::unsigned -> "count_1596", "count_1496"::unsigned -> "count_1496") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", count(("t"."b"::unsigned))::unsigned -> "count_1596", count(("t"."a"::unsigned))::unsigned -> "count_1496") + projection ("gr_expr_1"::unsigned -> "b", ROW(sum(("count_1"::unsigned))::unsigned) + ROW(sum(("count_2"::unsigned))::unsigned) -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_2"::unsigned -> "count_2", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", count(("t"."b"::unsigned))::unsigned -> "count_2", count(("t"."a"::unsigned))::unsigned -> "count_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1571,10 +1565,10 @@ fn front_sql_distinct_asterisk() { join (select "id" from "test_space") on true"#; let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_1996"::unsigned -> "id", "column_2096"::unsigned -> "id") - group by ("column_1996"::unsigned, "column_2096"::unsigned) output: ("column_1996"::unsigned -> "column_1996", "column_2096"::unsigned -> "column_2096") - motion [policy: segment([ref("column_1996"), ref("column_2096")])] - projection ("id"::unsigned -> "column_1996", "id"::unsigned -> "column_2096") + projection ("gr_expr_1"::unsigned -> "id", "gr_expr_2"::unsigned -> "id") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("id"::unsigned -> "gr_expr_1", "id"::unsigned -> "gr_expr_2") group by ("id"::unsigned, "id"::unsigned) output: ("id"::unsigned -> "id", "id"::unsigned -> "id") join on true::boolean scan @@ -1597,9 +1591,9 @@ fn front_sql_avg_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_696"::decimal::double))::decimal / sum(("count_696"::decimal::double))::decimal -> "col_1", avg(distinct ("column_796"::decimal::double))::decimal -> "col_2", ROW(sum(("sum_696"::decimal::double))::decimal / sum(("count_696"::decimal::double))::decimal) * ROW(sum(("sum_696"::decimal::double))::decimal / sum(("count_696"::decimal::double))::decimal) -> "col_3") + projection (sum(("avg_1"::decimal::double))::decimal / sum(("avg_2"::decimal::double))::decimal -> "col_1", avg(distinct ("gr_expr_1"::decimal::double))::decimal -> "col_2", ROW(sum(("avg_1"::decimal::double))::decimal / sum(("avg_2"::decimal::double))::decimal) * ROW(sum(("avg_1"::decimal::double))::decimal / sum(("avg_2"::decimal::double))::decimal) -> "col_3") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", count(("t"."b"::unsigned))::unsigned -> "count_696", sum(("t"."b"::unsigned))::decimal -> "sum_696") + projection ("t"."b"::unsigned -> "gr_expr_1", count(("t"."b"::unsigned))::unsigned -> "avg_2", sum(("t"."b"::unsigned))::decimal -> "avg_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1614,11 +1608,10 @@ fn front_sql_total_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (total(("total_696"::double))::double -> "col_1", total(distinct ("column_796"::double))::double -> "col_2") + projection (total(("total_1"::double))::double -> "col_1", total(distinct ("gr_expr_1"::double))::double -> "col_2") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", total(("t"."b"::unsigned))::double -> "total_696") + projection ("t"."b"::unsigned -> "gr_expr_1", total(("t"."b"::unsigned))::double -> "total_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1633,11 +1626,10 @@ fn front_sql_min_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (min(("min_696"::unsigned))::unsigned -> "col_1", min(distinct ("column_796"::unsigned))::unsigned -> "col_2") + projection (min(("min_1"::unsigned))::unsigned -> "col_1", min(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", min(("t"."b"::unsigned))::unsigned -> "min_696") + projection ("t"."b"::unsigned -> "gr_expr_1", min(("t"."b"::unsigned))::unsigned -> "min_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1652,11 +1644,10 @@ fn front_sql_max_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (max(("max_696"::unsigned))::unsigned -> "col_1", max(distinct ("column_796"::unsigned))::unsigned -> "col_2") + projection (max(("max_1"::unsigned))::unsigned -> "col_1", max(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", max(("t"."b"::unsigned))::unsigned -> "max_696") + projection ("t"."b"::unsigned -> "gr_expr_1", max(("t"."b"::unsigned))::unsigned -> "max_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1671,11 +1662,10 @@ fn front_sql_group_concat_aggregate() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (group_concat(("group_concat_696"::string))::string -> "col_1", group_concat(distinct ("column_796"::string))::string -> "col_2") + projection (group_concat(("group_concat_1"::string))::string -> "col_1", group_concat(distinct ("gr_expr_1"::string))::string -> "col_2") motion [policy: full] - projection ("test_space"."FIRST_NAME"::string -> "column_796", group_concat(("test_space"."FIRST_NAME"::string))::string -> "group_concat_696") + projection ("test_space"."FIRST_NAME"::string -> "gr_expr_1", group_concat(("test_space"."FIRST_NAME"::string))::string -> "group_concat_1") group by ("test_space"."FIRST_NAME"::string) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" execution options: @@ -1690,11 +1680,10 @@ fn front_sql_group_concat_aggregate2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (group_concat(("group_concat_696"::string, ' '::string))::string -> "col_1", group_concat(distinct ("column_796"::string))::string -> "col_2") + projection (group_concat(("group_concat_1"::string, ' '::string))::string -> "col_1", group_concat(distinct ("gr_expr_1"::string))::string -> "col_2") motion [policy: full] - projection ("test_space"."FIRST_NAME"::string -> "column_796", group_concat(("test_space"."FIRST_NAME"::string, ' '::string))::string -> "group_concat_696") + projection ("test_space"."FIRST_NAME"::string -> "gr_expr_1", group_concat(("test_space"."FIRST_NAME"::string, ' '::string))::string -> "group_concat_1") group by ("test_space"."FIRST_NAME"::string) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" execution options: @@ -1709,9 +1698,9 @@ fn front_sql_string_agg_alias_to_group_concat() { let input = r#"SELECT string_agg("FIRST_NAME", ',') FROM "test_space""#; let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (group_concat(("group_concat_696"::string, ','::string))::string -> "col_1") + projection (group_concat(("group_concat_1"::string, ','::string))::string -> "col_1") motion [policy: full] - projection (group_concat(("test_space"."FIRST_NAME"::string, ','::string))::string -> "group_concat_696") + projection (group_concat(("test_space"."FIRST_NAME"::string, ','::string))::string -> "group_concat_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -1722,10 +1711,10 @@ fn front_sql_string_agg_alias_to_group_concat() { let input = r#"SELECT "id", string_agg("FIRST_NAME", ',') FROM "test_space" GROUP BY "id""#; let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "id", group_concat(("group_concat_1396"::string, ','::string))::string -> "col_1") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "group_concat_1396"::string -> "group_concat_1396") - motion [policy: segment([ref("column_596")])] - projection ("test_space"."id"::unsigned -> "column_596", group_concat(("test_space"."FIRST_NAME"::string, ','::string))::string -> "group_concat_1396") + projection ("gr_expr_1"::unsigned -> "id", group_concat(("group_concat_1"::string, ','::string))::string -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "group_concat_1"::string -> "group_concat_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("test_space"."id"::unsigned -> "gr_expr_1", group_concat(("test_space"."FIRST_NAME"::string, ','::string))::string -> "group_concat_1") group by ("test_space"."id"::unsigned) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" execution options: @@ -1740,11 +1729,10 @@ fn front_sql_count_asterisk1() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_596"::unsigned))::unsigned -> "col_1", sum(("count_596"::unsigned))::unsigned -> "col_2") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1", sum(("count_1"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count((*::integer))::unsigned -> "count_596") + projection (count((*::integer))::unsigned -> "count_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -1758,12 +1746,11 @@ fn front_sql_count_asterisk2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_1196"::unsigned))::unsigned -> "col_1", "column_596"::unsigned -> "b") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "count_1196"::unsigned -> "count_1196") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", count((*::integer))::unsigned -> "count_1196") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1", "gr_expr_1"::unsigned -> "b") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", count((*::integer))::unsigned -> "count_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1795,10 +1782,10 @@ fn front_sql_aggregates_with_subexpressions() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "b", sum(("count_1496"::unsigned))::unsigned -> "col_1", sum(("count_1796"::unsigned))::unsigned -> "col_2") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "count_1496"::unsigned -> "count_1496", "count_1796"::unsigned -> "count_1796") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", count(((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned)) + ROW(1::unsigned)))::unsigned -> "count_1496", count(("func"(("t"."a"::unsigned))::integer))::unsigned -> "count_1796") + projection ("gr_expr_1"::unsigned -> "b", sum(("count_1"::unsigned))::unsigned -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1", "count_2"::unsigned -> "count_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", count(((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned)) + ROW(1::unsigned)))::unsigned -> "count_1", count(("func"(("t"."a"::unsigned))::integer))::unsigned -> "count_2") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1814,12 +1801,11 @@ fn front_sql_aggregates_with_distinct1() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "b", count(distinct ("column_1296"::unsigned))::unsigned -> "col_1", count(distinct ("column_596"::unsigned))::unsigned -> "col_2") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_1296"::unsigned -> "column_1296") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", "t"."a"::unsigned -> "column_1296") + projection ("gr_expr_1"::unsigned -> "b", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "col_1", count(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", "t"."a"::unsigned -> "gr_expr_2") group by ("t"."b"::unsigned, "t"."a"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1835,12 +1821,11 @@ fn front_sql_aggregates_with_distinct2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "b", sum(distinct ("column_1232"::decimal))::decimal -> "col_1") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_1232"::unsigned -> "column_1232") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) + ROW(3::unsigned) -> "column_1232") + projection ("gr_expr_1"::unsigned -> "b", sum(distinct ("gr_expr_2"::decimal))::decimal -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) + ROW(3::unsigned) -> "gr_expr_2") group by ("t"."b"::unsigned, (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) + ROW(3::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1855,11 +1840,10 @@ fn front_sql_aggregates_with_distinct3() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(distinct ("column_632"::decimal))::decimal -> "col_1") + projection (sum(distinct ("gr_expr_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection ((ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) + ROW(3::unsigned) -> "column_632") + projection ((ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) + ROW(3::unsigned) -> "gr_expr_1") group by ((ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) + ROW(3::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -1879,7 +1863,7 @@ fn front_sql_aggregate_inside_aggregate() { .unwrap_err(); assert_eq!( - "invalid query: aggregate function inside aggregate function is not allowed.", + "invalid query: aggregate functions inside aggregate function are not allowed.", err.to_string() ); } @@ -1973,13 +1957,12 @@ fn front_sql_pg_style_params3() { let plan = sql_to_optimized_ir(input, vec![Value::Unsigned(42)]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_1132"::unsigned -> "col_1") - having ROW(sum(("count_1896"::unsigned))::unsigned) > ROW(42::unsigned) - group by ("column_1132"::unsigned) output: ("column_1132"::unsigned -> "column_1132", "count_1896"::unsigned -> "count_1896") - motion [policy: segment([ref("column_1132")])] - projection (ROW("t"."a"::unsigned) + ROW(42::unsigned) -> "column_1132", count(("t"."b"::unsigned))::unsigned -> "count_1896") + projection ("gr_expr_1"::unsigned -> "col_1") + having ROW(sum(("count_1"::unsigned))::unsigned) > ROW(42::unsigned) + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("t"."a"::unsigned) + ROW(42::unsigned) -> "gr_expr_1", count(("t"."b"::unsigned))::unsigned -> "count_1") group by (ROW("t"."a"::unsigned) + ROW(42::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") selection ROW("t"."a"::unsigned) = ROW(42::unsigned) scan "t" @@ -2157,9 +2140,9 @@ fn front_sql_aggregate_without_groupby() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_796"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned)) + ROW(1::unsigned)))::decimal -> "sum_796") + projection (sum(((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned)) + ROW(1::unsigned)))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2176,9 +2159,9 @@ fn front_sql_aggregate_without_groupby2() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."col_1"::unsigned -> "col_1") scan "t1" - projection (sum(("count_696"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_696") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -2195,9 +2178,9 @@ fn front_sql_aggregate_on_aggregate() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (max(("t1"."c"::unsigned))::unsigned -> "col_1") scan "t1" - projection (sum(("count_696"::unsigned))::unsigned -> "c") + projection (sum(("count_1"::unsigned))::unsigned -> "c") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_696") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -2215,15 +2198,14 @@ fn front_sql_union_single_left() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all projection ("t"."a"::unsigned -> "a") scan "t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_1296"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1296") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2241,13 +2223,12 @@ fn front_sql_union_single_right() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all motion [policy: segment([ref("col_1")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" projection ("t"."a"::unsigned -> "a") scan "t" @@ -2267,18 +2248,17 @@ fn front_sql_union_single_both() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" union all motion [policy: segment([ref("col_1")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_1396"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1396") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2292,13 +2272,12 @@ fn front_sql_insert_single() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" insert "t" on conflict: fail motion [policy: segment([value(NULL), ref("col_2")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1", sum(("count_896"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (sum(("t"."b"::unsigned))::decimal -> "sum_696", count(("t"."d"::unsigned))::unsigned -> "count_896") + projection (sum(("t"."b"::unsigned))::decimal -> "sum_1", count(("t"."d"::unsigned))::unsigned -> "count_2") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2314,15 +2293,14 @@ fn front_sql_except_single_right() { "#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except projection ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b") scan "t" motion [policy: segment([ref("col_1"), ref("col_2")])] - projection (sum(("sum_1396"::decimal))::decimal -> "col_1", sum(("count_1596"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count(("t"."b"::unsigned))::unsigned -> "count_1596", sum(("t"."a"::unsigned))::decimal -> "sum_1396") + projection (count(("t"."b"::unsigned))::unsigned -> "count_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2336,15 +2314,14 @@ fn front_sql_except_single_right() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except projection ("t"."b"::unsigned -> "b", "t"."a"::unsigned -> "a") scan "t" motion [policy: segment([ref("col_2"), ref("col_1")])] - projection (sum(("sum_1396"::decimal))::decimal -> "col_1", sum(("count_1596"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count(("t"."b"::unsigned))::unsigned -> "count_1596", sum(("t"."a"::unsigned))::decimal -> "sum_1396") + projection (count(("t"."b"::unsigned))::unsigned -> "count_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2360,13 +2337,12 @@ fn front_sql_except_single_left() { "#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except motion [policy: segment([ref("col_1"), ref("col_2")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1", sum(("count_896"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count(("t"."b"::unsigned))::unsigned -> "count_896", sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (count(("t"."b"::unsigned))::unsigned -> "count_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" projection ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b") scan "t" @@ -2387,14 +2363,14 @@ fn front_sql_except_single_both() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except motion [policy: segment([ref("col_1")])] - projection (sum(("sum_696"::decimal))::decimal -> "col_1", sum(("count_896"::unsigned))::unsigned -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection (count(("t"."b"::unsigned))::unsigned -> "count_896", sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (count(("t"."b"::unsigned))::unsigned -> "count_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_1596"::decimal))::decimal -> "col_1", sum(("sum_1796"::decimal))::decimal -> "col_2") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("sum_2"::decimal))::decimal -> "col_2") motion [policy: full] - projection (sum(("t"."b"::unsigned))::decimal -> "sum_1796", sum(("t"."a"::unsigned))::decimal -> "sum_1596") + projection (sum(("t"."b"::unsigned))::decimal -> "sum_2", sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2409,12 +2385,11 @@ fn front_sql_groupby_expression() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_532"::unsigned -> "col_1") - group by ("column_532"::unsigned) output: ("column_532"::unsigned -> "column_532") - motion [policy: segment([ref("column_532")])] - projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_532") + projection ("gr_expr_1"::unsigned -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_1") group by (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2430,12 +2405,11 @@ fn front_sql_groupby_expression2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_532"::unsigned + ROW(sum(("count_1596"::unsigned))::unsigned) -> "col_1") - group by ("column_532"::unsigned) output: ("column_532"::unsigned -> "column_532", "count_1596"::unsigned -> "count_1596") - motion [policy: segment([ref("column_532")])] - projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_532", count(("t"."a"::unsigned))::unsigned -> "count_1596") + projection ("gr_expr_1"::unsigned + ROW(sum(("count_1"::unsigned))::unsigned) -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_1", count(("t"."a"::unsigned))::unsigned -> "count_1") group by (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2451,12 +2425,11 @@ fn front_sql_groupby_expression3() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_532"::unsigned -> "col_1", ("column_732"::unsigned * ROW(sum(("sum_2496"::decimal))::decimal)) / ROW(sum(("count_2596"::unsigned))::unsigned) -> "col_2") - group by ("column_532"::unsigned, "column_732"::unsigned) output: ("column_532"::unsigned -> "column_532", "column_732"::unsigned -> "column_732", "count_2596"::unsigned -> "count_2596", "sum_2496"::decimal -> "sum_2496") - motion [policy: segment([ref("column_532"), ref("column_732")])] - projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_532", ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned) -> "column_732", count((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned)))::unsigned -> "count_2596", sum((ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned)))::decimal -> "sum_2496") + projection ("gr_expr_1"::unsigned -> "col_1", ("gr_expr_2"::unsigned * ROW(sum(("sum_1"::decimal))::decimal)) / ROW(sum(("count_2"::unsigned))::unsigned) -> "col_2") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "count_2"::unsigned -> "count_2", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_1", ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned) -> "gr_expr_2", count((ROW("t"."a"::unsigned) * ROW("t"."b"::unsigned)))::unsigned -> "count_2", sum((ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned)))::decimal -> "sum_1") group by (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned), ROW("t"."c"::unsigned) * ROW("t"."d"::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2472,12 +2445,11 @@ fn front_sql_groupby_expression4() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_532"::unsigned -> "col_1", "column_796"::unsigned -> "a") - group by ("column_532"::unsigned, "column_796"::unsigned) output: ("column_532"::unsigned -> "column_532", "column_796"::unsigned -> "column_796") - motion [policy: segment([ref("column_532"), ref("column_796")])] - projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_532", "t"."a"::unsigned -> "column_796") + projection ("gr_expr_1"::unsigned -> "col_1", "gr_expr_2"::unsigned -> "a") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_1", "t"."a"::unsigned -> "gr_expr_2") group by (ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned), "t"."a"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2498,18 +2470,18 @@ fn front_sql_groupby_with_aggregates() { projection ("t1"."a"::unsigned -> "a", "t1"."b"::unsigned -> "b", "t1"."c"::decimal -> "c", "t2"."g"::unsigned -> "g", "t2"."e"::unsigned -> "e", "t2"."f"::decimal -> "f") join on ROW("t1"."a"::unsigned, "t1"."b"::unsigned) = ROW("t2"."e"::unsigned, "t2"."g"::unsigned) scan "t1" - projection ("column_596"::unsigned -> "a", "column_696"::unsigned -> "b", sum(("sum_1596"::decimal))::decimal -> "c") - group by ("column_596"::unsigned, "column_696"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_696"::unsigned -> "column_696", "sum_1596"::decimal -> "sum_1596") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("t"."a"::unsigned -> "column_596", "t"."b"::unsigned -> "column_696", sum(("t"."c"::unsigned))::decimal -> "sum_1596") + projection ("gr_expr_1"::unsigned -> "a", "gr_expr_2"::unsigned -> "b", sum(("sum_1"::decimal))::decimal -> "c") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t"."a"::unsigned -> "gr_expr_1", "t"."b"::unsigned -> "gr_expr_2", sum(("t"."c"::unsigned))::decimal -> "sum_1") group by ("t"."a"::unsigned, "t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" motion [policy: segment([ref("e"), ref("g")])] scan "t2" - projection ("column_2496"::unsigned -> "g", "column_2596"::unsigned -> "e", sum(("sum_3496"::decimal))::decimal -> "f") - group by ("column_2496"::unsigned, "column_2596"::unsigned) output: ("column_2496"::unsigned -> "column_2496", "column_2596"::unsigned -> "column_2596", "sum_3496"::decimal -> "sum_3496") - motion [policy: segment([ref("column_2496"), ref("column_2596")])] - projection ("t2"."g"::unsigned -> "column_2496", "t2"."e"::unsigned -> "column_2596", sum(("t2"."f"::unsigned))::decimal -> "sum_3496") + projection ("gr_expr_1"::unsigned -> "g", "gr_expr_2"::unsigned -> "e", sum(("sum_1"::decimal))::decimal -> "f") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t2"."g"::unsigned -> "gr_expr_1", "t2"."e"::unsigned -> "gr_expr_2", sum(("t2"."f"::unsigned))::decimal -> "sum_1") group by ("t2"."g"::unsigned, "t2"."e"::unsigned) output: ("t2"."e"::unsigned -> "e", "t2"."f"::unsigned -> "f", "t2"."g"::unsigned -> "g", "t2"."h"::unsigned -> "h", "t2"."bucket_id"::unsigned -> "bucket_id") scan "t2" execution options: @@ -2553,15 +2525,14 @@ fn front_sql_left_join_single_left() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."a"::decimal -> "a", "t2"."b"::unsigned -> "b") left join on ROW("t1"."a"::decimal) = ROW("t2"."b"::unsigned) motion [policy: segment([ref("a")])] scan "t1" - projection (ROW(sum(("sum_696"::decimal))::decimal) / ROW(3::unsigned) -> "a") + projection (ROW(sum(("sum_1"::decimal))::decimal) / ROW(3::unsigned) -> "a") motion [policy: full] - projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_696") + projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1") scan "test_space" motion [policy: full] scan "t2" @@ -2583,16 +2554,15 @@ fn front_sql_left_join_single_left2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // full motion should be under outer child insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."a"::decimal -> "a", "t2"."b"::unsigned -> "b") left join on (ROW("t1"."a"::decimal) + ROW(3::unsigned)) <> ROW("t2"."b"::unsigned) motion [policy: segment([ref("a")])] scan "t1" - projection (ROW(sum(("sum_696"::decimal))::decimal) / ROW(3::unsigned) -> "a") + projection (ROW(sum(("sum_1"::decimal))::decimal) / ROW(3::unsigned) -> "a") motion [policy: full] - projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_696") + projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1") scan "test_space" motion [policy: full] scan "t2" @@ -2614,20 +2584,19 @@ fn front_sql_left_join_single_both() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // full motion should be under outer child insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."a"::decimal -> "a", "t2"."b"::unsigned -> "b") left join on ROW("t1"."a"::decimal) <> ROW("t2"."b"::unsigned) scan "t1" - projection (ROW(sum(("sum_696"::decimal))::decimal) / ROW(3::unsigned) -> "a") + projection (ROW(sum(("sum_1"::decimal))::decimal) / ROW(3::unsigned) -> "a") motion [policy: full] - projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_696") + projection (sum(("test_space"."id"::unsigned))::decimal -> "sum_1") scan "test_space" scan "t2" - projection (sum(("count_1496"::unsigned))::unsigned -> "b") + projection (sum(("count_1"::unsigned))::unsigned -> "b") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1496") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 @@ -2672,13 +2641,14 @@ fn front_sql_having1() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); + println!("Formatted arena: {}", plan.formatted_arena().unwrap()); + insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "a", sum(("sum_2196"::decimal))::decimal -> "col_1") - having (ROW("column_596"::unsigned) > ROW(1::unsigned)) and (ROW(sum(distinct ("column_1296"::decimal))::decimal) > ROW(1::unsigned)) - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_1296"::unsigned -> "column_1296", "sum_2196"::decimal -> "sum_2196") - motion [policy: segment([ref("column_596")])] - projection ("t"."a"::unsigned -> "column_596", "t"."b"::unsigned -> "column_1296", sum(("t"."b"::unsigned))::decimal -> "sum_2196") + projection ("gr_expr_1"::unsigned -> "a", sum(("sum_1"::decimal))::decimal -> "col_1") + having (ROW("gr_expr_1"::unsigned) > ROW(1::unsigned)) and (ROW(sum(distinct ("gr_expr_2"::decimal))::decimal) > ROW(1::unsigned)) + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."a"::unsigned -> "gr_expr_1", "t"."b"::unsigned -> "gr_expr_2", sum(("t"."b"::unsigned))::decimal -> "sum_1") group by ("t"."a"::unsigned, "t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2695,12 +2665,11 @@ fn front_sql_having2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (ROW(sum(("sum_1696"::decimal))::decimal) * ROW(count(distinct ("column_1596"::unsigned))::unsigned) -> "col_1", sum(("sum_1696"::decimal))::decimal -> "col_2") - having (ROW(sum(distinct ("column_1596"::decimal))::decimal) > ROW(1::unsigned)) and (ROW(sum(("sum_1696"::decimal))::decimal) > ROW(1::unsigned)) + projection (ROW(sum(("sum_1"::decimal))::decimal) * ROW(count(distinct ("gr_expr_1"::unsigned))::unsigned) -> "col_1", sum(("sum_1"::decimal))::decimal -> "col_2") + having (ROW(sum(distinct ("gr_expr_1"::decimal))::decimal) > ROW(1::unsigned)) and (ROW(sum(("sum_1"::decimal))::decimal) > ROW(1::unsigned)) motion [policy: full] - projection ("t"."b"::unsigned -> "column_1596", sum(("t"."a"::unsigned))::decimal -> "sum_1696") + projection ("t"."b"::unsigned -> "gr_expr_1", sum(("t"."a"::unsigned))::decimal -> "sum_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2717,12 +2686,11 @@ fn front_sql_having3() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_1396"::decimal))::decimal -> "col_1") - having ROW(sum(("sum_1396"::decimal))::decimal) > ROW(1::unsigned) + projection (sum(("sum_1"::decimal))::decimal -> "col_1") + having ROW(sum(("sum_1"::decimal))::decimal) > ROW(1::unsigned) motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1396") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2755,13 +2723,12 @@ fn front_sql_having_with_sq() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "sysFrom", sum(distinct ("column_3396"::decimal))::decimal -> "sum", count(distinct ("column_3396"::unsigned))::unsigned -> "count") - having ROW($0) > ROW(count(distinct ("column_3396"::unsigned))::unsigned) - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_3396"::unsigned -> "column_3396") - motion [policy: segment([ref("column_596")])] - projection ("test_space"."sysFrom"::unsigned -> "column_596", "test_space"."id"::unsigned -> "column_3396") + projection ("gr_expr_1"::unsigned -> "sysFrom", sum(distinct ("gr_expr_2"::decimal))::decimal -> "sum", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "count") + having ROW($0) > ROW(count(distinct ("gr_expr_2"::unsigned))::unsigned) + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("test_space"."sysFrom"::unsigned -> "gr_expr_1", "test_space"."id"::unsigned -> "gr_expr_2") group by ("test_space"."sysFrom"::unsigned, "test_space"."id"::unsigned) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" subquery $0: @@ -2803,13 +2770,12 @@ fn front_sql_having_with_sq_segment_motion() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "sysFrom", "column_696"::unsigned -> "sys_op", sum(distinct ("column_3296"::decimal))::decimal -> "sum", count(distinct ("column_3296"::unsigned))::unsigned -> "count") - having ROW("column_596"::unsigned, "column_696"::unsigned) in ROW($0, $0) - group by ("column_596"::unsigned, "column_696"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_696"::unsigned -> "column_696", "column_3296"::unsigned -> "column_3296") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("test_space"."sysFrom"::unsigned -> "column_596", "test_space"."sys_op"::unsigned -> "column_696", "test_space"."id"::unsigned -> "column_3296") + projection ("gr_expr_1"::unsigned -> "sysFrom", "gr_expr_2"::unsigned -> "sys_op", sum(distinct ("gr_expr_3"::decimal))::decimal -> "sum", count(distinct ("gr_expr_3"::unsigned))::unsigned -> "count") + having ROW("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) in ROW($0, $0) + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "gr_expr_3"::unsigned -> "gr_expr_3") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("test_space"."sysFrom"::unsigned -> "gr_expr_1", "test_space"."sys_op"::unsigned -> "gr_expr_2", "test_space"."id"::unsigned -> "gr_expr_3") group by ("test_space"."sysFrom"::unsigned, "test_space"."sys_op"::unsigned, "test_space"."id"::unsigned) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" subquery $0: @@ -2835,13 +2801,12 @@ fn front_sql_having_with_sq_segment_local_motion() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_596"::unsigned -> "sysFrom", "column_696"::unsigned -> "sys_op", sum(distinct ("column_3296"::decimal))::decimal -> "sum", count(distinct ("column_3296"::unsigned))::unsigned -> "count") - having ROW("column_596"::unsigned, "column_696"::unsigned) in ROW($0, $0) - group by ("column_596"::unsigned, "column_696"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_696"::unsigned -> "column_696", "column_3296"::unsigned -> "column_3296") - motion [policy: segment([ref("column_596"), ref("column_696")])] - projection ("test_space"."sysFrom"::unsigned -> "column_596", "test_space"."sys_op"::unsigned -> "column_696", "test_space"."id"::unsigned -> "column_3296") + projection ("gr_expr_1"::unsigned -> "sysFrom", "gr_expr_2"::unsigned -> "sys_op", sum(distinct ("gr_expr_3"::decimal))::decimal -> "sum", count(distinct ("gr_expr_3"::unsigned))::unsigned -> "count") + having ROW("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) in ROW($0, $0) + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "gr_expr_3"::unsigned -> "gr_expr_3") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("test_space"."sysFrom"::unsigned -> "gr_expr_1", "test_space"."sys_op"::unsigned -> "gr_expr_2", "test_space"."id"::unsigned -> "gr_expr_3") group by ("test_space"."sysFrom"::unsigned, "test_space"."sys_op"::unsigned, "test_space"."id"::unsigned) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" subquery $0: @@ -2861,12 +2826,11 @@ fn front_sql_unique_local_aggregates() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // here we must compute only two aggregates at local stage: sum(a), count(a) insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_696"::decimal))::decimal -> "col_1", sum(("count_896"::unsigned))::unsigned -> "col_2", ROW(sum(("sum_696"::decimal))::decimal) + ROW(sum(("count_896"::unsigned))::unsigned) -> "col_3") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", sum(("count_2"::unsigned))::unsigned -> "col_2", ROW(sum(("sum_1"::decimal))::decimal) + ROW(sum(("count_2"::unsigned))::unsigned) -> "col_3") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_696", count(("t"."a"::unsigned))::unsigned -> "count_896") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1", count(("t"."a"::unsigned))::unsigned -> "count_2") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -2883,13 +2847,12 @@ fn front_sql_unique_local_groupings() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // here we must compute only two groupby columns at local stage: a, b insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(distinct ("column_1196"::decimal))::decimal -> "col_1", count(distinct ("column_1196"::unsigned))::unsigned -> "col_2", count(distinct ("column_596"::unsigned))::unsigned -> "col_3") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "column_1196"::unsigned -> "column_1196") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", "t"."a"::unsigned -> "column_1196") + projection (sum(distinct ("gr_expr_2"::decimal))::decimal -> "col_1", count(distinct ("gr_expr_2"::unsigned))::unsigned -> "col_2", count(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_3") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", "t"."a"::unsigned -> "gr_expr_2") group by ("t"."b"::unsigned, "t"."a"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2942,13 +2905,12 @@ fn front_sql_select_distinct() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); // here we must compute only two groupby columns at local stage: a, b insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_896"::unsigned -> "a", "column_832"::unsigned -> "col_1") - group by ("column_896"::unsigned, "column_832"::unsigned) output: ("column_896"::unsigned -> "column_896", "column_832"::unsigned -> "column_832") - motion [policy: segment([ref("column_896"), ref("column_832")])] - projection ("t"."a"::unsigned -> "column_896", ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "column_832") + projection ("gr_expr_1"::unsigned -> "a", "gr_expr_2"::unsigned -> "col_1") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2")])] + projection ("t"."a"::unsigned -> "gr_expr_1", ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned) -> "gr_expr_2") group by ("t"."a"::unsigned, ROW("t"."a"::unsigned) + ROW("t"."b"::unsigned)) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -2963,12 +2925,11 @@ fn front_sql_select_distinct_asterisk() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_996"::unsigned -> "a", "column_1096"::unsigned -> "b", "column_1196"::unsigned -> "c", "column_1296"::unsigned -> "d") - group by ("column_996"::unsigned, "column_1096"::unsigned, "column_1196"::unsigned, "column_1296"::unsigned) output: ("column_996"::unsigned -> "column_996", "column_1096"::unsigned -> "column_1096", "column_1196"::unsigned -> "column_1196", "column_1296"::unsigned -> "column_1296") - motion [policy: segment([ref("column_996"), ref("column_1096"), ref("column_1196"), ref("column_1296")])] - projection ("t"."a"::unsigned -> "column_996", "t"."b"::unsigned -> "column_1096", "t"."c"::unsigned -> "column_1196", "t"."d"::unsigned -> "column_1296") + projection ("gr_expr_1"::unsigned -> "a", "gr_expr_2"::unsigned -> "b", "gr_expr_3"::unsigned -> "c", "gr_expr_4"::unsigned -> "d") + group by ("gr_expr_1"::unsigned, "gr_expr_2"::unsigned, "gr_expr_3"::unsigned, "gr_expr_4"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "gr_expr_2"::unsigned -> "gr_expr_2", "gr_expr_3"::unsigned -> "gr_expr_3", "gr_expr_4"::unsigned -> "gr_expr_4") + motion [policy: segment([ref("gr_expr_1"), ref("gr_expr_2"), ref("gr_expr_3"), ref("gr_expr_4")])] + projection ("t"."a"::unsigned -> "gr_expr_1", "t"."b"::unsigned -> "gr_expr_2", "t"."c"::unsigned -> "gr_expr_3", "t"."d"::unsigned -> "gr_expr_4") group by ("t"."a"::unsigned, "t"."b"::unsigned, "t"."c"::unsigned, "t"."d"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -3000,12 +2961,11 @@ fn front_sql_select_distinct_with_aggr() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_1296"::decimal))::decimal -> "col_1", "column_596"::unsigned -> "b") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "sum_1296"::decimal -> "sum_1296") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", sum(("t"."a"::unsigned))::decimal -> "sum_1296") + projection (sum(("sum_1"::decimal))::decimal -> "col_1", "gr_expr_1"::unsigned -> "b") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", sum(("t"."a"::unsigned))::decimal -> "sum_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -3020,11 +2980,10 @@ fn front_sql_select_distinct_with_aggr2() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("sum_696"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_696") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" execution options: sql_vdbe_opcode_max = 45000 @@ -3337,7 +3296,6 @@ fn front_sql_update6() { let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" update "t3" "b" = "col_0" @@ -3348,9 +3306,9 @@ fn front_sql_update6() { subquery $0: motion [policy: full] scan - projection (sum(("sum_796"::decimal))::decimal -> "s") + projection (sum(("sum_1"::decimal))::decimal -> "s") motion [policy: full] - projection (sum(("t3"."b"::integer))::decimal -> "sum_796") + projection (sum(("t3"."b"::integer))::decimal -> "sum_1") scan "t3" execution options: sql_vdbe_opcode_max = 45000 @@ -3847,12 +3805,11 @@ fn front_subqueries_interpreted_as_expression_under_group_by() { let input = r#"SELECT COUNT(*) FROM "test_space" GROUP BY "id" + (VALUES (1))"#; let plan = sql_to_optimized_ir(input, vec![]); - println!("{}", plan.as_explain().unwrap()); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_1496"::unsigned))::unsigned -> "col_1") - group by ("column_932"::unsigned) output: ("column_932"::unsigned -> "column_932", "count_1496"::unsigned -> "count_1496") - motion [policy: segment([ref("column_932")])] - projection (ROW("test_space"."id"::unsigned) + ROW($1) -> "column_932", count((*::integer))::unsigned -> "count_1496") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("test_space"."id"::unsigned) + ROW($1) -> "gr_expr_1", count((*::integer))::unsigned -> "count_1") group by (ROW("test_space"."id"::unsigned) + ROW($0)) output: ("test_space"."id"::unsigned -> "id", "test_space"."sysFrom"::unsigned -> "sysFrom", "test_space"."FIRST_NAME"::string -> "FIRST_NAME", "test_space"."sys_op"::unsigned -> "sys_op", "test_space"."bucket_id"::unsigned -> "bucket_id") scan "test_space" subquery $0: @@ -3892,9 +3849,9 @@ fn front_select_without_scan_2() { subquery $0: motion [policy: full] scan - projection (sum(("count_796"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count((*::integer))::unsigned -> "count_796") + projection (count((*::integer))::unsigned -> "count_1") scan "t2" subquery $1: scan @@ -4023,9 +3980,6 @@ fn front_sql_whitespaces_are_not_ignored() { for query in correct_queries { let res = ParseTree::parse(Rule::Command, query); - if res.is_err() { - println!("Query [{query}] is invalid.") - } assert!(res.is_ok()); } @@ -4036,9 +3990,6 @@ fn front_sql_whitespaces_are_not_ignored() { fixed.push_str(&query[..wp_idx]); fixed.push_str(&query[wp_idx + 1..]); let res = ParseTree::parse(Rule::Command, &fixed); - if res.is_ok() { - println!("Query [{fixed}] is valid.") - } assert!(res.is_err()) } } diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/coalesce.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/coalesce.rs index b2fd230860e7e24ef39ba3b915beb0f27ee72465..96d404056cbabddd94b602ebc532b02b1571c153 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/coalesce.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/coalesce.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn coalesce_in_projection() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/funcs.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/funcs.rs index 9f0cc48dc5b23b5e0c58fd1501ed212957093aab..f229685e21100f2d6ed0f49bb6ee2dd91578b545 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/funcs.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/funcs.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn lower_upper() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/global.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/global.rs index 7b8ecb9e3a07067746c91196c346069a304c2e89..fcf39c2386cc01a06b873af71ccbc0dd96f54906 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/global.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/global.rs @@ -86,9 +86,9 @@ fn front_sql_global_tbl_sq1() { scan "global_t" subquery $0: scan - projection (sum(("sum_1596"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1596") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" subquery $1: motion [policy: full] @@ -120,9 +120,9 @@ fn front_sql_global_tbl_multiple_sqs1() { scan "global_t" subquery $0: scan - projection (sum(("sum_1796"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1796") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" subquery $1: scan @@ -155,9 +155,9 @@ fn front_sql_global_tbl_multiple_sqs2() { scan "global_t" subquery $0: scan - projection (sum(("sum_1796"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t"."a"::unsigned))::decimal -> "sum_1796") + projection (sum(("t"."a"::unsigned))::decimal -> "sum_1") scan "t" subquery $1: motion [policy: full] @@ -469,9 +469,9 @@ fn front_sql_global_join4() { projection ("s"."e"::decimal -> "e") left join on true::boolean scan "s" - projection (sum(("sum_696"::decimal))::decimal -> "e") + projection (sum(("sum_1"::decimal))::decimal -> "e") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_696") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" scan "global_t" projection ("global_t"."a"::integer -> "a", "global_t"."b"::integer -> "b") @@ -500,9 +500,9 @@ fn front_sql_global_join5() { projection ("global_t"."a"::integer -> "a", "global_t"."b"::integer -> "b") scan "global_t" scan "s" - projection (sum(("sum_896"::decimal))::decimal -> "e") + projection (sum(("sum_1"::decimal))::decimal -> "e") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_896") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" execution options: sql_vdbe_opcode_max = 45000 @@ -761,11 +761,11 @@ fn front_sql_global_aggregate5() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_1432"::integer -> "col_1", sum(("sum_2896"::decimal))::decimal -> "col_2") - having ROW(sum(("sum_2296"::decimal::double))::decimal / sum(("count_2296"::decimal::double))::decimal) > ROW(3::unsigned) - group by ("column_1432"::integer) output: ("column_1432"::integer -> "column_1432", "sum_2296"::decimal -> "sum_2296", "count_2296"::unsigned -> "count_2296", "sum_2896"::decimal -> "sum_2896") - motion [policy: segment([ref("column_1432")])] - projection (ROW("global_t"."b"::integer) + ROW("global_t"."a"::integer) -> "column_1432", sum(("global_t"."b"::integer))::decimal -> "sum_2296", count(("global_t"."b"::integer))::unsigned -> "count_2296", sum(("global_t"."a"::integer))::decimal -> "sum_2896") + projection ("gr_expr_1"::integer -> "col_1", sum(("sum_1"::decimal))::decimal -> "col_2") + having ROW(sum(("avg_2"::decimal::double))::decimal / sum(("avg_3"::decimal::double))::decimal) > ROW(3::unsigned) + group by ("gr_expr_1"::integer) output: ("gr_expr_1"::integer -> "gr_expr_1", "avg_2"::decimal -> "avg_2", "avg_3"::unsigned -> "avg_3", "sum_1"::decimal -> "sum_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("global_t"."b"::integer) + ROW("global_t"."a"::integer) -> "gr_expr_1", sum(("global_t"."b"::integer))::decimal -> "avg_2", count(("global_t"."b"::integer))::unsigned -> "avg_3", sum(("global_t"."a"::integer))::decimal -> "sum_1") group by (ROW("global_t"."b"::integer) + ROW("global_t"."a"::integer)) output: ("global_t"."a"::integer -> "a", "global_t"."b"::integer -> "b") selection ROW("global_t"."a"::integer, "global_t"."b"::integer) in ROW($0, $0) scan "global_t" @@ -992,9 +992,9 @@ fn front_sql_global_union_all3() { projection ("global_t"."a"::integer -> "a") scan "global_t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_996"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_996") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" motion [policy: local] projection ("global_t"."b"::integer -> "b") @@ -1093,9 +1093,9 @@ fn front_sql_global_union2() { projection ("global_t"."a"::integer -> "a") scan "global_t" motion [policy: segment([ref("col_1")])] - projection (sum(("sum_996"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_996") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" execution options: sql_vdbe_opcode_max = 45000 @@ -1232,9 +1232,9 @@ fn check_plan_except_global_vs_single() { except projection ("global_t"."a"::integer -> "a") scan "global_t" - projection (sum(("sum_996"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_996") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" execution options: sql_vdbe_opcode_max = 45000 @@ -1254,9 +1254,9 @@ fn check_plan_except_single_vs_global() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" except - projection (sum(("sum_696"::decimal))::decimal -> "col_1") + projection (sum(("sum_1"::decimal))::decimal -> "col_1") motion [policy: full] - projection (sum(("t2"."e"::unsigned))::decimal -> "sum_696") + projection (sum(("t2"."e"::unsigned))::decimal -> "sum_1") scan "t2" projection ("global_t"."a"::integer -> "a") scan "global_t" diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/insert.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/insert.rs index 03066c1aa27dc844261351740e1226b81206d399..e6191f9bce45e9b080cf0e0d7f1bcbbfc04fed55 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/insert.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/insert.rs @@ -1,6 +1,5 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; use crate::ir::value::Value; -use pretty_assertions::assert_eq; #[test] fn insert1() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/join.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/join.rs index 123ca2d17211342b2f2661dc6d4ae7b1aedc0441..020c0aa9a392c521a3d4abce35cc64b9807b6c90 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/join.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/join.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn milti_join1() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/like.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/like.rs index 12b61c11de4032a5249474279a324f3156cc6084..19b0f36f27b62d8d19bc1b1ac7667621adbd1aa8 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/like.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/like.rs @@ -90,10 +90,10 @@ fn like_explain3() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_332"::boolean -> "col_1") - group by ("column_332"::boolean) output: ("column_332"::boolean -> "column_332") - motion [policy: segment([ref("column_332")])] - projection (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string) -> "column_332") + projection ("gr_expr_1"::boolean -> "col_1") + group by ("gr_expr_1"::boolean) output: ("gr_expr_1"::boolean -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string) -> "gr_expr_1") group by (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string)) output: ("t1"."a"::string -> "a", "t1"."bucket_id"::unsigned -> "bucket_id", "t1"."b"::integer -> "b") scan "t1" execution options: @@ -140,10 +140,10 @@ fn ilike_explain() { let plan = sql_to_optimized_ir(input, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection ("column_332"::boolean -> "col_1") - group by ("column_332"::boolean) output: ("column_332"::boolean -> "column_332") - motion [policy: segment([ref("column_332")])] - projection (ROW(lower(("t1"."a"::string))::string) LIKE ROW(lower(("t1"."a"::string))::string) ESCAPE ROW('x'::string) -> "column_332") + projection ("gr_expr_1"::boolean -> "col_1") + group by ("gr_expr_1"::boolean) output: ("gr_expr_1"::boolean -> "gr_expr_1") + motion [policy: segment([ref("gr_expr_1")])] + projection (ROW(lower(("t1"."a"::string))::string) LIKE ROW(lower(("t1"."a"::string))::string) ESCAPE ROW('x'::string) -> "gr_expr_1") group by (ROW(lower(("t1"."a"::string))::string) LIKE ROW(lower(("t1"."a"::string))::string) ESCAPE ROW('x'::string)) output: ("t1"."a"::string -> "a", "t1"."bucket_id"::unsigned -> "bucket_id", "t1"."b"::integer -> "b") scan "t1" execution options: diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs index b5a5cba46fe1f6ea875adca47039223119ccccb6..7a34cf10ca8fe047fbf17d188e74f0fe6fb52a1a 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/limit.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn select() { @@ -51,9 +50,9 @@ fn aggregate() { insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" limit 1 - projection (min(("min_696"::unsigned))::unsigned -> "col_1", min(distinct ("column_796"::unsigned))::unsigned -> "col_2") + projection (min(("min_1"::unsigned))::unsigned -> "col_1", min(distinct ("gr_expr_1"::unsigned))::unsigned -> "col_2") motion [policy: full] - projection ("t"."b"::unsigned -> "column_796", min(("t"."b"::unsigned))::unsigned -> "min_696") + projection ("t"."b"::unsigned -> "gr_expr_1", min(("t"."b"::unsigned))::unsigned -> "min_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: @@ -72,10 +71,10 @@ fn group_by() { limit 555 motion [policy: full] limit 555 - projection (sum(("count_1196"::unsigned))::unsigned -> "col_1", "column_596"::unsigned -> "b") - group by ("column_596"::unsigned) output: ("column_596"::unsigned -> "column_596", "count_1196"::unsigned -> "count_1196") - motion [policy: segment([ref("column_596")])] - projection ("t"."b"::unsigned -> "column_596", count((*::integer))::unsigned -> "count_1196") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1", "gr_expr_1"::unsigned -> "b") + group by ("gr_expr_1"::unsigned) output: ("gr_expr_1"::unsigned -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("t"."b"::unsigned -> "gr_expr_1", count((*::integer))::unsigned -> "count_1") group by ("t"."b"::unsigned) output: ("t"."a"::unsigned -> "a", "t"."b"::unsigned -> "b", "t"."c"::unsigned -> "c", "t"."d"::unsigned -> "d", "t"."bucket_id"::unsigned -> "bucket_id") scan "t" execution options: diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/trim.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/trim.rs index 59f78d038498570f4b2d855259c7a25d3c095d0e..d7033ccf069007f8d26c5a92b61ddd4b932347e0 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/trim.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/trim.rs @@ -1,5 +1,4 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; -use pretty_assertions::assert_eq; #[test] fn trim() { diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/union.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/union.rs index 19f7a853dd31493e184c485dcfdeaf8621f244e0..8c6fd8ce03e60ae504b6260550d1633cf222129a 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/union.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/union.rs @@ -1,5 +1,3 @@ -use pretty_assertions::assert_eq; - use crate::ir::transformation::helpers::sql_to_optimized_ir; #[test] diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests/update.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests/update.rs index fba50342213eafab75ca5e024fa92875ad7d1f0e..4d27d91d4f3ea2e1a85c6e9dd75c97db0aed1318 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests/update.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests/update.rs @@ -1,6 +1,5 @@ use crate::ir::transformation::helpers::sql_to_optimized_ir; use crate::ir::value::Value; -use pretty_assertions::assert_eq; #[test] fn update1() { diff --git a/sbroad/sbroad-core/src/ir.rs b/sbroad/sbroad-core/src/ir.rs index 79035eb8d5b9849b1c4ba2ee70d5717d5ac86cf1..753d8e4362d35cf744a8ce404a0a5bf34af6a02d 100644 --- a/sbroad/sbroad-core/src/ir.rs +++ b/sbroad/sbroad-core/src/ir.rs @@ -1028,6 +1028,7 @@ impl Plan { Relational::Selection(Selection { output, .. }) | Relational::Having(Having { output, .. }) | Relational::OrderBy(OrderBy { output, .. }) + | Relational::GroupBy(GroupBy { output, .. }) | Relational::Limit(Limit { output, .. }) => { let source_output_list = self.get_row_list(*output)?; let source_ref_id = source_output_list[*position]; @@ -1038,7 +1039,6 @@ impl Plan { | Relational::Projection { .. } | Relational::SelectWithoutScan { .. } | Relational::ScanCte { .. } - | Relational::GroupBy { .. } | Relational::Motion { .. } | Relational::ScanSubQuery { .. } | Relational::Join { .. } diff --git a/sbroad/sbroad-core/src/ir/aggregates.rs b/sbroad/sbroad-core/src/ir/aggregates.rs index 924e5dd0d3f4069a780952488fdde23a46205e0b..df7170fe75092e1dfbe1e0ca6cd35dcfc87cdb06 100644 --- a/sbroad/sbroad-core/src/ir/aggregates.rs +++ b/sbroad/sbroad-core/src/ir/aggregates.rs @@ -1,23 +1,31 @@ +use ahash::AHashMap; use smol_str::{format_smolstr, ToSmolStr}; use crate::errors::{Entity, SbroadError}; use crate::ir::expression::cast::Type; +use crate::ir::helpers::RepeatableState; use crate::ir::node::{NodeId, Reference, StableFunction}; use crate::ir::operator::Arithmetic; use crate::ir::relation::Type as RelType; use crate::ir::Plan; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter}; +use std::hash::{Hash, Hasher}; use std::rc::Rc; -use super::expression::{ColumnPositionMap, FunctionFeature, Position}; +use super::expression::{ + ColumnPositionMap, Comparator, FunctionFeature, Position, EXPR_HASH_DEPTH, +}; +use super::function::{Behavior, Function}; use super::node::expression::Expression; +use super::node::relational::Relational; +use super::node::{Having, Projection}; use super::relation::DerivedType; use crate::frontend::sql::ir::SubtreeCloner; -/// The kind of aggregate function +/// The kind of aggregate function. /// -/// Examples: avg, sum, count, .. +/// Examples: avg, sum, count. #[derive(Clone, Debug, Hash, Eq, PartialEq, Copy)] pub enum AggregateKind { COUNT, @@ -45,23 +53,25 @@ impl Display for AggregateKind { } impl AggregateKind { + /// Returns None in case passed function name is not aggregate. #[must_use] - pub fn new(name: &str) -> Option<AggregateKind> { - let normalized = name.to_lowercase(); - match normalized.as_str() { - "count" => Some(AggregateKind::COUNT), - "sum" => Some(AggregateKind::SUM), - "avg" => Some(AggregateKind::AVG), - "total" => Some(AggregateKind::TOTAL), - "min" => Some(AggregateKind::MIN), - "max" => Some(AggregateKind::MAX), - "group_concat" | "string_agg" => Some(AggregateKind::GRCONCAT), - _ => None, - } + pub fn from_name(func_name: &str) -> Option<AggregateKind> { + let normalized = func_name.to_lowercase(); + let kind = match normalized.as_str() { + "count" => AggregateKind::COUNT, + "sum" => AggregateKind::SUM, + "avg" => AggregateKind::AVG, + "total" => AggregateKind::TOTAL, + "min" => AggregateKind::MIN, + "max" => AggregateKind::MAX, + "group_concat" | "string_agg" => AggregateKind::GRCONCAT, + _ => return None, + }; + Some(kind) } - #[inline(always)] - pub fn to_type(self, plan: &Plan, args: &[NodeId]) -> Result<DerivedType, SbroadError> { + /// Get type of the corresponding aggregate function. + pub fn get_type(self, plan: &Plan, args: &[NodeId]) -> Result<DerivedType, SbroadError> { let ty = match self { AggregateKind::COUNT => RelType::Unsigned, @@ -79,6 +89,9 @@ impl AggregateKind { Ok(DerivedType::new(ty)) } + /// Get aggregate functions that must be present on the local (Map) stage + /// of two stage aggregation in order to calculate given aggregate (`self`) + /// on the reduce stage. #[must_use] pub fn get_local_aggregates_kinds(&self) -> Vec<AggregateKind> { match self { @@ -92,34 +105,13 @@ impl AggregateKind { } } - /// Calculate argument type of aggregate function - /// - /// # Errors - /// - Invalid index - /// - Node doesn't exist in the plan - /// - Node is not an expression type - pub fn get_arg_type( - idx: usize, - plan: &Plan, - args: &[NodeId], - ) -> Result<DerivedType, SbroadError> { - let arg_id = *args.get(idx).ok_or(SbroadError::NotFound( - Entity::Index, - format_smolstr!("no element at index {idx} in args {args:?}"), - ))?; - let expr = plan.get_expression_node(arg_id)?; - expr.calculate_type(plan) - } - - /// Check agruments types of aggregate function - /// - /// # Errors - /// - Invlid plan/aggregate - /// - Invalid argument type - /// - /// # Panics - /// - Invalid argument count for aggregate + /// Check that aggregate function arguments have expected types. pub fn check_args_types(&self, plan: &Plan, args: &[NodeId]) -> Result<(), SbroadError> { + let get_arg_type = |idx: usize, args: &[NodeId]| { + let expr = plan.get_expression_node(args[idx])?; + expr.calculate_type(plan) + }; + let err = |arg_type: &RelType| -> Result<(), SbroadError> { Err(SbroadError::Invalid( Entity::Query, @@ -131,7 +123,7 @@ impl AggregateKind { }; match self { AggregateKind::SUM | AggregateKind::AVG | AggregateKind::TOTAL => { - let arg_type = Self::get_arg_type(0, plan, args)?; + let arg_type = get_arg_type(0, args)?; let Some(arg_type) = arg_type.get() else { return Ok(()); }; @@ -143,7 +135,7 @@ impl AggregateKind { } } AggregateKind::MIN | AggregateKind::MAX => { - let arg_type = Self::get_arg_type(0, plan, args)?; + let arg_type = get_arg_type(0, args)?; let Some(arg_type) = arg_type.get() else { return Ok(()); }; @@ -152,12 +144,12 @@ impl AggregateKind { } } AggregateKind::GRCONCAT => { - let arg_type_first = Self::get_arg_type(0, plan, args)?; + let arg_type_first = get_arg_type(0, args)?; let Some(first_type) = arg_type_first.get() else { return Ok(()); }; if args.len() == 2 { - let arg_type_second = Self::get_arg_type(1, plan, args)?; + let arg_type_second = get_arg_type(1, args)?; let Some(second_type) = arg_type_second.get() else { return Ok(()); }; @@ -180,10 +172,9 @@ impl AggregateKind { Ok(()) } - /// Get final aggregate corresponding to given local aggregate - /// - /// # Errors - /// - Invalid combination of this aggregate and local aggregate + /// Get final aggregate corresponding to given local aggregate. + /// 1) Checks that `local_aggregate` and final `self` aggregate corresponds to each other + /// 2) Gets type of final aggregate pub fn get_final_aggregate_kind( &self, local_aggregate: &AggregateKind, @@ -208,14 +199,21 @@ impl AggregateKind { } } -/// Helper struct for adding aggregates to ir -/// -/// This struct can be used for adding any Tarantool aggregate: -/// avg, sum, count, min, max, total -#[derive(Debug, Clone)] -pub struct SimpleAggregate { - /// The aggregate function being added, like COUNT +/// Pair of (aggregate kind, its position in the output). +pub(crate) type PositionKind = (Position, AggregateKind); + +/// Metadata about aggregates. +#[derive(Clone, Debug)] +pub struct Aggregate { + /// Id of Relational node in which this aggregate is located. + /// It can be located in `Projection`, `Having`, `OrderBy`. + pub parent_rel: NodeId, + /// Id of parent expression of aggregate function. + pub parent_expr: NodeId, + /// The aggregate function being added, like COUNT, SUM, etc. pub kind: AggregateKind, + /// "local aggregate aliases". + /// /// For non-distinct aggregate maps local aggregate kind to /// corresponding local alias. For distinct aggregate maps /// its aggregate kind to local alias used for corresponding @@ -233,79 +231,116 @@ pub struct SimpleAggregate { /// original query: `select avg(distinct b) from t` /// map query: `select b as l1 from t group by b)` /// map will contain: `avg` -> `l1` - pub lagg_alias: HashMap<AggregateKind, Rc<String>>, - /// id of aggregate function in IR + pub lagg_aliases: AHashMap<AggregateKind, Rc<String>>, + /// Id of aggregate function in plan. pub fun_id: NodeId, + /// Whether this aggregate was marked distinct in original user query + pub is_distinct: bool, } -#[cfg(not(feature = "mock"))] -#[must_use] -pub fn generate_local_alias_for_aggr(kind: &AggregateKind, suffix: &str) -> String { - format!( - "{}_{kind}_{suffix}", - uuid::Uuid::new_v4().as_simple().to_string() - ) -} - -#[cfg(feature = "mock")] -#[must_use] -pub fn generate_local_alias_for_aggr(kind: &AggregateKind, suffix: &str) -> String { - format!("{kind}_{suffix}") -} - -impl SimpleAggregate { +impl Aggregate { #[must_use] - pub fn new(name: &str, fun_id: NodeId) -> Option<SimpleAggregate> { - let kind = AggregateKind::new(name)?; - let laggr_alias: HashMap<AggregateKind, Rc<String>> = HashMap::new(); - let aggr = SimpleAggregate { + pub fn from_name( + name: &str, + fun_id: NodeId, + parent_rel: NodeId, + parent_expr: NodeId, + is_distinct: bool, + ) -> Option<Self> { + let kind = AggregateKind::from_name(name)?; + let aggr = Self { kind, fun_id, - lagg_alias: laggr_alias, + lagg_aliases: AHashMap::with_capacity(2), + parent_rel, + parent_expr, + is_distinct, }; Some(aggr) } -} - -pub(crate) type PositionKind = (Position, AggregateKind); -impl SimpleAggregate { pub(crate) fn get_position_kinds( &self, alias_to_pos: &ColumnPositionMap, - is_distinct: bool, ) -> Result<Vec<PositionKind>, SbroadError> { - if is_distinct { - let local_alias = self.lagg_alias.get(&self.kind).ok_or_else(|| { - SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!( - "missing local alias for distinct aggregate: {self:?}" - )), - ) - })?; - let position = alias_to_pos.get(local_alias)?; - Ok(vec![(position, self.kind)]) + let res = if self.is_distinct { + // For distinct aggregates kinds of + // local and final aggregates are the same. + let local_alias = self + .lagg_aliases + .get(&self.kind) + .expect("missing local alias for distinct aggregate: {self:?}"); + let pos = alias_to_pos.get(local_alias)?; + vec![(pos, self.kind)] } else { let aggr_kinds = self.kind.get_local_aggregates_kinds(); let mut res = Vec::with_capacity(aggr_kinds.len()); for aggr_kind in aggr_kinds { - let local_alias = self.lagg_alias.get(&aggr_kind).ok_or_else(|| { - SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!( - "missing local alias for local aggregate ({aggr_kind}): {self:?}" - )), - ) - })?; - let position = alias_to_pos.get(local_alias)?; - res.push((position, aggr_kind)); + let local_alias = self + .lagg_aliases + .get(&aggr_kind) + .expect("missing local alias for local aggregate ({aggr_kind}): {self:?}"); + let pos = alias_to_pos.get(local_alias)?; + res.push((pos, aggr_kind)); } - Ok(res) - } + res + }; + Ok(res) } - /// Create final aggregate expression and return its id + fn create_final_aggr( + &self, + plan: &mut Plan, + position: Position, + final_kind: AggregateKind, + ) -> Result<NodeId, SbroadError> { + let fun_expr = plan.get_expression_node(self.fun_id)?; + let col_type = fun_expr.calculate_type(plan)?; + + let ref_node = Reference { + parent: Some(self.parent_rel), + // Final node has only one required child. + targets: Some(vec![0]), + position, + col_type, + asterisk_source: None, + }; + let ref_id = plan.nodes.push(ref_node.into()); + let children: Vec<NodeId> = match self.kind { + AggregateKind::AVG => vec![plan.add_cast(ref_id, Type::Double)?], + AggregateKind::GRCONCAT => { + let Expression::StableFunction(StableFunction { children, .. }) = + plan.get_expression_node(self.fun_id)? + else { + unreachable!("Aggregate should reference expression by fun_id") + }; + + if let Some(delimiter_id) = children.get(1) { + vec![ref_id, SubtreeCloner::clone_subtree(plan, *delimiter_id)?] + } else { + vec![ref_id] + } + } + _ => vec![ref_id], + }; + let feature = if self.is_distinct { + Some(FunctionFeature::Distinct) + } else { + None + }; + let func_type = self.kind.get_type(plan, &children)?; + let final_aggr = StableFunction { + name: final_kind.to_smolstr(), + children, + feature, + func_type, + is_system: true, + }; + let aggr_id = plan.nodes.push(final_aggr.into()); + Ok(aggr_id) + } + + /// Create final aggregate expression and return its id. /// /// # Examples /// Suppose this aggregate is non-distinct `AVG` and at local stage @@ -325,121 +360,297 @@ impl SimpleAggregate { /// ```txt /// avg(column_1) /// ``` - /// - /// # Errors - /// - Invalid aggregate - /// - Could not find local alias position in child output #[allow(clippy::too_many_lines)] pub(crate) fn create_final_aggregate_expr( &self, - parent: NodeId, plan: &mut Plan, - fun_type: DerivedType, - mut position_kinds: Vec<PositionKind>, - is_distinct: bool, + position_kinds: Vec<PositionKind>, ) -> Result<NodeId, SbroadError> { - // map local AggregateKind to finalised expression of that aggregate - let mut final_aggregates: HashMap<AggregateKind, NodeId> = HashMap::new(); - let mut create_final_aggr = |position: Position, - local_kind: AggregateKind, - final_func: AggregateKind| - -> Result<(), SbroadError> { - let ref_node = Reference { - parent: Some(parent), - // projection has only one child - targets: Some(vec![0]), - position, - col_type: fun_type, - asterisk_source: None, - }; - let ref_id = plan.nodes.push(ref_node.into()); - let children = match self.kind { - AggregateKind::AVG => vec![plan.add_cast(ref_id, Type::Double)?], - AggregateKind::GRCONCAT => { - if let Expression::StableFunction(StableFunction { children, .. }) = - plan.get_expression_node(self.fun_id)? - { - if children.len() > 1 { - let second_arg = { - let a = *children - .get(1) - .ok_or(SbroadError::Invalid(Entity::Aggregate, None))?; - SubtreeCloner::clone_subtree(plan, a)? - }; - vec![ref_id, second_arg] - } else { - vec![ref_id] - } - } else { - return Err(SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!( - "fun_id ({:?}) points to other expression node", - self.fun_id - )), - )); - } - } - _ => vec![ref_id], - }; - let feature = if is_distinct { - Some(FunctionFeature::Distinct) - } else { - None - }; - let func_type = self.kind.to_type(plan, &children)?; - let final_aggr = StableFunction { - name: final_func.to_smolstr(), - children, - feature, - func_type, - is_system: true, - }; - let aggr_id = plan.nodes.push(final_aggr.into()); - final_aggregates.insert(local_kind, aggr_id); - Ok(()) - }; - if is_distinct { - let (position, kind) = position_kinds.drain(..).next().ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues("position kinds are empty".to_smolstr()) - })?; - create_final_aggr(position, kind, self.kind)?; + // Map of {local AggregateKind -> finalized expression of that aggregate}. + let mut final_aggregates: HashMap<AggregateKind, NodeId> = + HashMap::with_capacity(AGGR_CAPACITY); + + if self.is_distinct { + // For distinct aggregates kinds of local and final aggregates are the same. + let (position, local_kind) = position_kinds + .first() + .expect("Distinct aggregate should have the only position kind"); + let aggr_id = self.create_final_aggr(plan, *position, self.kind)?; + final_aggregates.insert(*local_kind, aggr_id); } else { - for (position, kind) in position_kinds { - let final_aggregate_kind = self.kind.get_final_aggregate_kind(&kind)?; - create_final_aggr(position, kind, final_aggregate_kind)?; + for (position, local_kind) in position_kinds { + let final_aggregate_kind = self.kind.get_final_aggregate_kind(&local_kind)?; + let aggr_id = self.create_final_aggr(plan, position, final_aggregate_kind)?; + final_aggregates.insert(local_kind, aggr_id); } } + let final_expr_id = if final_aggregates.len() == 1 { - *final_aggregates.values().next().ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues("final_aggregates is empty".into()) - })? + *final_aggregates.values().next().unwrap() } else { match self.kind { AggregateKind::AVG => { - let sum_aggr = *final_aggregates.get(&AggregateKind::SUM).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues( - "final_aggregates: missing final aggregate for SUM".into(), - ) - })?; - let count_aggr = - *final_aggregates.get(&AggregateKind::COUNT).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues( - "final_aggregates: missing final aggregate for COUNT".into(), - ) - })?; + let sum_aggr = *final_aggregates + .get(&AggregateKind::SUM) + .expect("SUM aggregate expr should exist for final AVG"); + let count_aggr = *final_aggregates + .get(&AggregateKind::COUNT) + .expect("COUNT aggregate expr should exist for final AVG"); plan.add_arithmetic_to_plan(sum_aggr, Arithmetic::Divide, count_aggr)? } _ => { - return Err(SbroadError::Unsupported( - Entity::Aggregate, - Some(format_smolstr!( - "aggregate with multiple final aggregates: {self:?}" - )), - )) + unreachable!("The only aggregate with multiple final aggregates is AVG") } } }; Ok(final_expr_id) } } + +/// Capacity for the vec of aggregates which we expect to extract +/// from final nodes like Projection and Having. +const AGGR_CAPACITY: usize = 10; + +/// Helper struct to find aggregates in expressions of finals. +struct AggrCollector<'plan> { + /// Id of final node in which matches are searched. + parent_rel: NodeId, + /// Collected aggregates. + aggrs: Vec<Aggregate>, + plan: &'plan Plan, +} + +impl<'plan> AggrCollector<'plan> { + pub fn with_capacity( + plan: &'plan Plan, + capacity: usize, + parent_rel: NodeId, + ) -> AggrCollector<'plan> { + AggrCollector { + aggrs: Vec::with_capacity(capacity), + parent_rel, + plan, + } + } + + /// Collect aggregates in internal field by traversing expression tree `top` + /// + /// # Arguments + /// * `top` - id of expression root in which to look for aggregates + /// * `parent_rel` - id of parent relational node, where `top` is located. It is used to + /// create `AggrInfo` + pub fn collect_aggregates(&mut self, top: NodeId) -> Result<Vec<Aggregate>, SbroadError> { + self.find(top, None)?; + Ok(std::mem::take(&mut self.aggrs)) + } + + fn find(&mut self, current: NodeId, parent_expr: Option<NodeId>) -> Result<(), SbroadError> { + let expr = self.plan.get_expression_node(current)?; + if let Expression::StableFunction(StableFunction { name, feature, .. }) = expr { + let is_distinct = matches!(feature, Some(FunctionFeature::Distinct)); + let parent_expr = parent_expr.expect( + "Aggregate stable function under final relational node should have a parent expr", + ); + if let Some(aggr) = + Aggregate::from_name(name, current, self.parent_rel, parent_expr, is_distinct) + { + self.aggrs.push(aggr); + return Ok(()); + }; + } + for child in self.plan.nodes.expr_iter(current, false) { + self.find(child, Some(current))?; + } + Ok(()) + } +} + +/// Helper struct to filter duplicate aggregates in local stage. +/// +/// Consider user query: `select sum(a), avg(a) from t` +/// at local stage we need to compute `sum(a)` only once. +/// +/// This struct contains info needed to compute hash and compare aggregates +/// used at local stage. +struct AggregateSignature<'plan> { + pub kind: AggregateKind, + /// Ids of expressions used as arguments to aggregate. + pub arguments: Vec<NodeId>, + pub plan: &'plan Plan, + /// Local alias of this local aggregate. + pub local_alias: Rc<String>, +} + +impl Hash for AggregateSignature<'_> { + fn hash<H: Hasher>(&self, state: &mut H) { + self.kind.hash(state); + let mut comp = Comparator::new(self.plan); + comp.set_hasher(state); + for arg in &self.arguments { + comp.hash_for_expr(*arg, EXPR_HASH_DEPTH); + } + } +} + +impl PartialEq<Self> for AggregateSignature<'_> { + fn eq(&self, other: &Self) -> bool { + let comparator = Comparator::new(self.plan); + self.kind == other.kind + && self + .arguments + .iter() + .zip(other.arguments.iter()) + .all(|(l, r)| comparator.are_subtrees_equal(*l, *r).unwrap_or(false)) + } +} + +impl Eq for AggregateSignature<'_> {} + +fn aggr_local_alias(kind: AggregateKind, index: usize) -> String { + format!("{kind}_{index}") +} + +impl Plan { + /// Collect information about aggregates. + /// + /// Aggregates can appear in `Projection`, `Having`. + /// TODO: We should also support OrderBy. + /// + /// # Arguments + /// * `finals` - ids of nodes in final (reduce stage) before adding two stage aggregation. + /// It may contain ids of `Projection`, `Having` or `NamedWindows`. + /// Note: final `GroupBy` is not present because it will be added later in 2-stage pipeline. + pub fn collect_aggregates(&self, finals: &Vec<NodeId>) -> Result<Vec<Aggregate>, SbroadError> { + let mut aggrs = Vec::with_capacity(AGGR_CAPACITY); + for node_id in finals { + let node = self.get_relation_node(*node_id)?; + match node { + Relational::Projection(Projection { output, .. }) => { + let mut collector = AggrCollector::with_capacity(self, AGGR_CAPACITY, *node_id); + for col in self.get_row_list(*output)? { + aggrs.extend(collector.collect_aggregates(*col)?); + } + } + Relational::Having(Having { filter, .. }) => { + let mut collector = AggrCollector::with_capacity(self, AGGR_CAPACITY, *node_id); + aggrs.extend(collector.collect_aggregates(*filter)?); + } + _ => { + unreachable!( + "Unexpected {node:?} met as final relational to collect aggregates" + ) + } + } + } + + for aggr in &aggrs { + let top = aggr.fun_id; + if self.contains_aggregates(top, false)? { + return Err(SbroadError::Invalid( + Entity::Query, + Some("aggregate functions inside aggregate function are not allowed.".into()), + )); + } + } + + Ok(aggrs) + } + + pub fn create_local_aggregate( + &mut self, + kind: AggregateKind, + arguments: &[NodeId], + local_alias: &str, + ) -> Result<NodeId, SbroadError> { + let fun: Function = Function { + name: kind.to_smolstr(), + behavior: Behavior::Stable, + func_type: kind.get_type(self, arguments)?, + is_system: true, + }; + // We can reuse aggregate expression between local aggregates, because + // all local aggregates are located inside the same motion subtree and we + // assume that each local aggregate does not need to modify its expression + let local_fun_id = self.add_stable_function(&fun, arguments.to_vec(), None)?; + let alias_id = self.nodes.add_alias(local_alias, local_fun_id)?; + Ok(alias_id) + } + + /// Adds aggregates columns in `output_cols` for local `Projection` + /// + /// This function collects local aggregates from each `Aggregate`, + /// then it removes duplicates from them using `AggregateSignature`. + /// Next, it creates for each unique aggregate local alias and column. + #[allow(clippy::mutable_key_type)] + pub fn add_local_aggregates( + &mut self, + aggrs: &mut [Aggregate], + output_cols: &mut Vec<NodeId>, + ) -> Result<(), SbroadError> { + let mut local_alias_index = 1; + + // Aggregate expressions can appear in `Projection`, `Having`, `OrderBy`, if the + // same expression appears in different places, we must not calculate it separately: + // `select sum(a) from t group by b having sum(a) > 10` + // Here `sum(a)` appears both in projection and having, so we need to calculate it only once. + let mut unique_local_aggregates: HashSet<AggregateSignature, RepeatableState> = + HashSet::with_hasher(RepeatableState); + for pos in 0..aggrs.len() { + let (final_kind, arguments, aggr_kinds) = { + let aggr: &Aggregate = aggrs.get(pos).unwrap(); + if aggr.is_distinct { + continue; + } + + let Expression::StableFunction(StableFunction { + children: arguments, + .. + }) = self.get_expression_node(aggr.fun_id)? + else { + unreachable!("Aggregate should reference StableFunction by fun_id") + }; + + ( + aggr.kind, + arguments.clone(), + aggr.kind.get_local_aggregates_kinds(), + ) + }; + + for kind in aggr_kinds { + let local_alias = Rc::new(aggr_local_alias(final_kind, local_alias_index)); + + let signature = AggregateSignature { + kind, + arguments: arguments.clone(), + plan: self, + local_alias: local_alias.clone(), + }; + if let Some(sig) = unique_local_aggregates.get(&signature) { + let aggr: &mut Aggregate = aggrs.get_mut(pos).unwrap(); + aggr.lagg_aliases.insert(kind, sig.local_alias.clone()); + } else { + let aggr = aggrs.get_mut(pos).unwrap(); + + // New aggregate was really added. + local_alias_index += 1; + aggr.lagg_aliases.insert(kind, local_alias.clone()); + unique_local_aggregates.insert(signature); + } + } + } + + type LocalAggregate = (AggregateKind, Vec<NodeId>, Rc<String>); + // Add non-distinct aggregates to local projection. + let local_aggregates: Vec<LocalAggregate> = unique_local_aggregates + .into_iter() + .map(|x| (x.kind, x.arguments.clone(), x.local_alias.clone())) + .collect(); + for (kind, arguments, local_alias) in local_aggregates { + let alias_id = self.create_local_aggregate(kind, &arguments, local_alias.as_str())?; + output_cols.push(alias_id); + } + + Ok(()) + } +} diff --git a/sbroad/sbroad-core/src/ir/distribution/tests.rs b/sbroad/sbroad-core/src/ir/distribution/tests.rs index ad36a4dafb8e8482c1749b12a48767e9a88c77d9..9ebe6d4ed2a229815c6ae2b4033fab2b9a156aff 100644 --- a/sbroad/sbroad-core/src/ir/distribution/tests.rs +++ b/sbroad/sbroad-core/src/ir/distribution/tests.rs @@ -66,9 +66,9 @@ fn projection_any_dist_for_expr() { // check explain first insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_696"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count(("test_space"."id"::unsigned))::unsigned -> "count_696") + projection (count(("test_space"."id"::unsigned))::unsigned -> "count_1") scan "test_space" execution options: sql_vdbe_opcode_max = 45000 diff --git a/sbroad/sbroad-core/src/ir/explain/tests.rs b/sbroad/sbroad-core/src/ir/explain/tests.rs index 7e8d49485a11e6fb9cd1f6713c5611029e8ea434..af8041da1c68507932988b861e3c1b3384b02f25 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests.rs @@ -13,7 +13,6 @@ fn simple_query_without_cond_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."identification_number"::integer -> "c1", "t"."product_code"::string -> "product_code") scan "hash_testing" -> "t" @@ -32,7 +31,6 @@ fn simple_query_with_cond_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."identification_number"::integer -> "c1", "t"."product_code"::string -> "product_code") selection (ROW("t"."identification_number"::integer) = ROW(1::unsigned)) and (ROW("t"."product_code"::string) = ROW('222'::string)) @@ -82,7 +80,6 @@ WHERE "id" = 1"#; let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."id"::unsigned -> "id", "t"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("t"."id"::unsigned) = ROW(1::unsigned) @@ -121,7 +118,6 @@ WHERE "id" IN (SELECT "id" let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."id"::unsigned -> "id", "t"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("t"."id"::unsigned) in ROW($0) @@ -203,7 +199,6 @@ fn motion_subquery_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t"."id"::unsigned -> "id", "t"."FIRST_NAME"::string -> "FIRST_NAME") selection (ROW("t"."id"::unsigned) in ROW($1)) or (ROW("t"."id"::unsigned) in ROW($0)) @@ -251,7 +246,6 @@ WHERE "t2"."product_code" = '123'"#; let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t1"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("t2"."product_code"::string) = ROW('123'::string) @@ -281,7 +275,6 @@ FROM (SELECT "id", "FIRST_NAME" FROM "test_space" WHERE "id" = 3) as "t1" let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("t1"."FIRST_NAME"::string -> "FIRST_NAME") join on ROW("t1"."id"::unsigned) = ROW($0) @@ -313,7 +306,6 @@ fn unary_condition_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("test_space"."id"::unsigned -> "id", "test_space"."FIRST_NAME"::string -> "FIRST_NAME") selection (ROW("test_space"."id"::unsigned) is null) and (not (ROW("test_space"."FIRST_NAME"::string) is null)) @@ -408,7 +400,6 @@ fn select_value_plan() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("COLUMN_1"::unsigned -> "COLUMN_1") scan @@ -429,7 +420,6 @@ fn select_cast_plan1() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("test_space"."id"::unsigned::unsigned -> "b") scan "test_space" @@ -448,7 +438,6 @@ fn select_cast_plan2() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("test_space"."id"::unsigned -> "id", "test_space"."FIRST_NAME"::string -> "FIRST_NAME") selection ROW("test_space"."id"::unsigned::int) = ROW(1::unsigned) @@ -468,7 +457,6 @@ fn select_cast_plan_nested() { let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); - let mut actual_explain = String::new(); insta::assert_snapshot!(explain_tree.to_string(), @r#" projection ("func"(("test_space"."id"::unsigned))::integer::string -> "col_1") scan "test_space" diff --git a/sbroad/sbroad-core/src/ir/explain/tests/cast_constants.rs b/sbroad/sbroad-core/src/ir/explain/tests/cast_constants.rs index 1e705209d8c6e420f85193f10077c2c1409d0858..b2d162bd280d1b65162b56486ef09c7fb291c0b8 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/cast_constants.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/cast_constants.rs @@ -1,5 +1,4 @@ use crate::executor::{engine::mock::RouterRuntimeMock, Query}; -use pretty_assertions::assert_eq; #[test] fn select_values_rows() { diff --git a/sbroad/sbroad-core/src/ir/explain/tests/concat.rs b/sbroad/sbroad-core/src/ir/explain/tests/concat.rs index a6d5547b7dd0cb008cf1e55cf98dae51700d87e8..7bac576cdb1b63f54d8cd28dff17f0f9c269971b 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/concat.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/concat.rs @@ -4,8 +4,6 @@ use super::*; fn concat1_test() { let sql = r#"SELECT CAST('1' as string) || 'hello' FROM "t1""#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection (ROW('1'::string) || ROW('hello'::string) -> "col_1") scan "t1" @@ -19,8 +17,6 @@ fn concat1_test() { fn concat2_test() { let sql = r#"SELECT "a" FROM "t1" WHERE CAST('1' as string) || FUNC('hello') || '2' = 42"#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" projection ("t1"."a"::string -> "a") selection ROW(ROW(ROW('1'::string) || ROW("func"(('hello'::string))::integer)) || ROW('2'::string)) = ROW(42::unsigned) diff --git a/sbroad/sbroad-core/src/ir/explain/tests/delete.rs b/sbroad/sbroad-core/src/ir/explain/tests/delete.rs index 49a2766ae5dec03df47b7d5a31e446b322412bb7..b18756ab084ef073703a0a4ad38939088cd7b5dc 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/delete.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/delete.rs @@ -4,8 +4,6 @@ use super::*; fn delete1_test() { let sql = r#"DELETE FROM "t1""#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" delete "t1" execution options: @@ -18,8 +16,6 @@ fn delete1_test() { fn delete2_test() { let sql = r#"DELETE FROM "t1" where "a" > 3"#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" delete "t1" motion [policy: local] @@ -36,8 +32,6 @@ fn delete2_test() { fn delete3_test() { let sql = r#"DELETE FROM "t1" where "a" in (SELECT "b" from "t1")"#; let plan = sql_to_optimized_ir(sql, vec![]); - let top = &plan.get_top().unwrap(); - let explain_tree = FullExplain::new(&plan, *top).unwrap(); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" delete "t1" motion [policy: local] diff --git a/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs b/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs index adbeda78c1e6aa2bc5f06bf0077bcd9d78be2324..3fd44b9471fe8699ae05daa8a0b8741619e8facb 100644 --- a/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs +++ b/sbroad/sbroad-core/src/ir/explain/tests/query_explain.rs @@ -1,6 +1,3 @@ -use pretty_assertions::assert_eq; -use smol_str::ToSmolStr; - use crate::executor::{engine::mock::RouterRuntimeMock, Query}; #[test] @@ -58,9 +55,9 @@ fn test_query_explain_4() { let metadata = &RouterRuntimeMock::new(); let mut query = Query::new(metadata, sql, vec![]).unwrap(); insta::assert_snapshot!(query.to_explain().unwrap(), @r#" - projection (sum(("count_596"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count((*::integer))::unsigned -> "count_596") + projection (count((*::integer))::unsigned -> "count_1") scan "t2" execution options: sql_vdbe_opcode_max = 45000 @@ -193,10 +190,10 @@ fn test_query_explain_11() { let metadata = &RouterRuntimeMock::new(); let mut query = Query::new(metadata, sql, vec![]).unwrap(); insta::assert_snapshot!(query.to_explain().unwrap(), @r#" - projection ("column_3496"::string -> "a", sum(("count_4196"::unsigned))::unsigned -> "col_1") - group by ("column_3496"::string) output: ("column_3496"::string -> "column_3496", "count_4196"::unsigned -> "count_4196") - motion [policy: segment([ref("column_3496")])] - projection ("a"::string -> "column_3496", count(("b"::integer))::unsigned -> "count_4196") + projection ("gr_expr_1"::string -> "a", sum(("count_1"::unsigned))::unsigned -> "col_1") + group by ("gr_expr_1"::string) output: ("gr_expr_1"::string -> "gr_expr_1", "count_1"::unsigned -> "count_1") + motion [policy: segment([ref("gr_expr_1")])] + projection ("a"::string -> "gr_expr_1", count(("b"::integer))::unsigned -> "count_1") group by ("a"::string) output: ("e"::unsigned -> "e", "f"::unsigned -> "f", "a"::string -> "a", "b"::integer -> "b") join on ROW("e"::unsigned) = ROW("a"::string) scan diff --git a/sbroad/sbroad-core/src/ir/function.rs b/sbroad/sbroad-core/src/ir/function.rs index 79509a861de6a6cc9e697e5232fc683fa1d8ab17..bf4c3f66e0390df64bf94fd455d288bb83925c9a 100644 --- a/sbroad/sbroad-core/src/ir/function.rs +++ b/sbroad/sbroad-core/src/ir/function.rs @@ -54,10 +54,6 @@ impl Function { impl Plan { /// Adds a stable function to the plan. - /// - /// # Errors - /// - Function is not stable. - /// - Function is not found in the plan. pub fn add_stable_function( &mut self, function: &Function, @@ -82,12 +78,6 @@ impl Plan { } /// Add aggregate function to plan - /// - /// # Errors - /// - Invalid arguments for given aggregate function - /// - /// # Panics - /// - never pub fn add_aggregate_function( &mut self, function: &str, @@ -134,7 +124,7 @@ impl Plan { }; let func_expr = StableFunction { name: function.to_lowercase().to_smolstr(), - func_type: kind.to_type(self, &children)?, + func_type: kind.get_type(self, &children)?, children, feature, is_system: true, diff --git a/sbroad/sbroad-core/src/ir/helpers.rs b/sbroad/sbroad-core/src/ir/helpers.rs index 42cf8f19c0ea04910ba47c54e6c329a2d0636595..6b22f8f06277dfc9f34dac08418b08f8f1220b87 100644 --- a/sbroad/sbroad-core/src/ir/helpers.rs +++ b/sbroad/sbroad-core/src/ir/helpers.rs @@ -389,10 +389,8 @@ impl Plan { writeln_with_tabulation(buf, tabulation_number + 1, "Filter")?; self.formatted_arena_node(buf, tabulation_number + 1, *filter)?; } - Relational::GroupBy(GroupBy { - gr_exprs, is_final, .. - }) => { - writeln!(buf, "GroupBy [is_final = {is_final}]")?; + Relational::GroupBy(GroupBy { gr_exprs, .. }) => { + writeln!(buf, "GroupBy")?; writeln_with_tabulation(buf, tabulation_number + 1, "Gr_cols:")?; for expr_id in gr_exprs { let expr = self.get_expression_node(*expr_id); diff --git a/sbroad/sbroad-core/src/ir/helpers/tests.rs b/sbroad/sbroad-core/src/ir/helpers/tests.rs index daa4424840245b7a5269a143219a544f5f084982..845ff83b0683ca94c975f3434dcca6f2fcc56685 100644 --- a/sbroad/sbroad-core/src/ir/helpers/tests.rs +++ b/sbroad/sbroad-core/src/ir/helpers/tests.rs @@ -259,7 +259,7 @@ fn simple_aggregation_with_group_by() { [id: 432] expression: Alias [name = bucket_id, child = Reference(Reference { parent: Some(NodeId { offset: 1, arena_type: Arena64 }), targets: None, position: 4, col_type: DerivedType(Some(Unsigned)), asterisk_source: None })] --------------------------------------------- --------------------------------------------- -[id: 364] relation: GroupBy [is_final = false] +[id: 364] relation: GroupBy Gr_cols: Gr_col: Reference(Reference { parent: Some(NodeId { offset: 3, arena_type: Arena64 }), targets: Some([0]), position: 1, col_type: DerivedType(Some(String)), asterisk_source: None }) Children: @@ -280,7 +280,7 @@ fn simple_aggregation_with_group_by() { Output_id: 664 [id: 664] expression: Row [distribution = Some(Any)] List: - [id: 1132] expression: Alias [name = column_596, child = Reference(Reference { parent: Some(NodeId { offset: 3, arena_type: Arena64 }), targets: Some([0]), position: 1, col_type: DerivedType(Some(String)), asterisk_source: None })] + [id: 1132] expression: Alias [name = gr_expr_1, child = Reference(Reference { parent: Some(NodeId { offset: 7, arena_type: Arena64 }), targets: Some([0]), position: 1, col_type: DerivedType(Some(String)), asterisk_source: None })] --------------------------------------------- --------------------------------------------- [id: 0136] relation: Motion [policy = Segment(MotionKey { targets: [Reference(0)] }), alias = None] @@ -289,10 +289,10 @@ fn simple_aggregation_with_group_by() { Output_id: 1064 [id: 1064] expression: Row [distribution = Some(Segment { keys: KeySet({Key { positions: [0] }}) })] List: - [id: 1332] expression: Alias [name = column_596, child = Reference(Reference { parent: Some(NodeId { offset: 0, arena_type: Arena136 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None })] + [id: 1332] expression: Alias [name = gr_expr_1, child = Reference(Reference { parent: Some(NodeId { offset: 0, arena_type: Arena136 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None })] --------------------------------------------- --------------------------------------------- -[id: 964] relation: GroupBy [is_final = true] +[id: 964] relation: GroupBy Gr_cols: Gr_col: Reference(Reference { parent: Some(NodeId { offset: 9, arena_type: Arena64 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None }) Children: @@ -300,7 +300,7 @@ fn simple_aggregation_with_group_by() { Output_id: 864 [id: 864] expression: Row [distribution = Some(Segment { keys: KeySet({Key { positions: [0] }}) })] List: - [id: 1232] expression: Alias [name = column_596, child = Reference(Reference { parent: Some(NodeId { offset: 9, arena_type: Arena64 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None })] + [id: 1232] expression: Alias [name = gr_expr_1, child = Reference(Reference { parent: Some(NodeId { offset: 9, arena_type: Arena64 }), targets: Some([0]), position: 0, col_type: DerivedType(Some(String)), asterisk_source: None })] --------------------------------------------- --------------------------------------------- [id: 564] relation: Projection diff --git a/sbroad/sbroad-core/src/ir/node.rs b/sbroad/sbroad-core/src/ir/node.rs index 453f81d71bba42af97b8f00510cb90545f90f41a..a885eb8654f781145c9d7a31221da35f2e9b902c 100644 --- a/sbroad/sbroad-core/src/ir/node.rs +++ b/sbroad/sbroad-core/src/ir/node.rs @@ -653,11 +653,14 @@ impl From<Selection> for NodeAligned { #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] pub struct GroupBy { - /// The first child is a relational operator before group by + /// The first child is a + /// * Scan in case it's local GroupBy + /// * Motion with policy Segment in case two stage aggregation was applied + /// + /// Other children are subqueries used under grouping expressions. pub children: Vec<NodeId>, pub gr_exprs: Vec<NodeId>, pub output: NodeId, - pub is_final: bool, } impl From<GroupBy> for NodeAligned { diff --git a/sbroad/sbroad-core/src/ir/node/expression.rs b/sbroad/sbroad-core/src/ir/node/expression.rs index ab88abcb8d1559aa0c6da36cc01a8bdf3ef07e9b..53b302e00266abc379eac0daeb9705a265fdc8db 100644 --- a/sbroad/sbroad-core/src/ir/node/expression.rs +++ b/sbroad/sbroad-core/src/ir/node/expression.rs @@ -153,7 +153,7 @@ impl Expression<'_> { #[must_use] pub fn is_aggregate_name(name: &str) -> bool { // currently we support only simple aggregates - AggregateKind::new(name).is_some() + AggregateKind::from_name(name).is_some() } #[must_use] diff --git a/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs b/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs index e1ee9248d2ac8000aa8e9d5f872ac536c5a0ca08..fffb5b7cc93e668a1d0f1e3ed1b9e9183d2bddcc 100644 --- a/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs +++ b/sbroad/sbroad-core/src/ir/transformation/redistribution/groupby.rs @@ -1,134 +1,61 @@ +use ahash::AHashMap; use smol_str::{format_smolstr, ToSmolStr}; use crate::errors::{Entity, SbroadError}; use crate::executor::engine::helpers::to_user; use crate::frontend::sql::ir::SubtreeCloner; -use crate::ir::aggregates::{generate_local_alias_for_aggr, AggregateKind, SimpleAggregate}; +use crate::ir::aggregates::Aggregate; use crate::ir::distribution::Distribution; -use crate::ir::expression::{ColumnPositionMap, Comparator, FunctionFeature, EXPR_HASH_DEPTH}; +use crate::ir::expression::{ColumnPositionMap, Comparator, EXPR_HASH_DEPTH}; use crate::ir::node::expression::Expression; use crate::ir::node::relational::{MutRelational, Relational}; -use crate::ir::node::{ - Alias, ArenaType, GroupBy, Having, NodeId, Projection, Reference, StableFunction, -}; +use crate::ir::node::{Alias, ArenaType, GroupBy, Having, NodeId, Projection, Reference}; use crate::ir::transformation::redistribution::{ MotionKey, MotionPolicy, Program, Strategy, Target, }; -use crate::ir::tree::traversal::{BreadthFirst, PostOrderWithFilter, EXPR_CAPACITY}; +use crate::ir::tree::traversal::{PostOrder, PostOrderWithFilter, EXPR_CAPACITY}; use crate::ir::{Node, Plan}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; -use crate::ir::function::{Behavior, Function}; use crate::ir::helpers::RepeatableState; -use crate::utils::{OrderedMap, OrderedSet}; +use crate::utils::OrderedMap; use std::hash::{Hash, Hasher}; use std::rc::Rc; -const AGGR_CAPACITY: usize = 10; - -/// Helper struct to store metadata about aggregates -#[derive(Clone, Debug)] -struct AggrInfo { - /// id of Relational node in which this aggregate is located. - /// It can be located in `Projection`, `Having`, `OrderBy` - parent_rel: NodeId, - /// id of parent expression of aggregate function, - /// if there is no parent it's `None` - parent_expr: Option<NodeId>, - /// info about what aggregate it is: sum, count, ... - aggr: SimpleAggregate, - /// whether this aggregate was marked distinct in original user query - is_distinct: bool, -} - -/// Helper struct to find aggregates in expressions of finals -struct AggrCollector<'plan> { - /// id of final node in which matches are searched - parent_rel: Option<NodeId>, - /// collected aggregates - infos: Vec<AggrInfo>, - plan: &'plan Plan, -} - /// Helper struct to hold information about /// location of grouping expressions used in /// nodes other than `GroupBy`. /// -/// For example grouping expressions can appear -/// in `Projection`, `Having`, `OrderBy` -struct ExpressionLocationIds { - pub parent_expr: Option<NodeId>, - pub expr: NodeId, - pub rel: NodeId, +/// E.g. for query `select 1 + a from t group by a` +/// location for grouping expression `a` will look like +/// { +/// `expr`: id of a under sum expr, +/// `parent_expr`: Some(id of sum expr), +/// `rel`: Projection +/// } +#[derive(Debug, Clone)] +struct ExpressionLocationId { + /// Id of grouping expression. + pub expr_id: NodeId, + /// Id of expression which is a parent of `expr`. + pub parent_expr_id: Option<NodeId>, + /// Relational node in which this `expr` is used. + pub rel_id: NodeId, } -impl ExpressionLocationIds { +impl ExpressionLocationId { pub fn new(expr_id: NodeId, parent_expr_id: Option<NodeId>, rel_id: NodeId) -> Self { - ExpressionLocationIds { - parent_expr: parent_expr_id, - expr: expr_id, - rel: rel_id, + ExpressionLocationId { + parent_expr_id, + expr_id, + rel_id, } } } -/// Helper struct to filter duplicate aggregates in local stage. -/// -/// Consider user query: `select sum(a), avg(a) from t` -/// at local stage we need to compute `sum(a)` only once. -/// -/// This struct contains info needed to compute hash and compare aggregates -/// used at local stage. -struct AggregateSignature<'plan, 'args> { - pub kind: AggregateKind, - /// ids of expressions used as arguments to aggregate - pub arguments: &'args Vec<NodeId>, - pub plan: &'plan Plan, - /// reference to local alias of this local aggregate - pub local_alias: Option<Rc<String>>, -} - -impl AggregateSignature<'_, '_> { - pub fn get_alias(&self) -> Result<Rc<String>, SbroadError> { - let r = self - .local_alias - .as_ref() - .ok_or_else(|| { - SbroadError::Invalid( - Entity::AggregateSignature, - Some("missing local alias".into()), - ) - })? - .clone(); - Ok(r) - } -} - -impl Hash for AggregateSignature<'_, '_> { - fn hash<H: Hasher>(&self, state: &mut H) { - self.kind.hash(state); - let mut comp = Comparator::new(self.plan); - comp.set_hasher(state); - for arg in self.arguments { - comp.hash_for_expr(*arg, EXPR_HASH_DEPTH); - } - } -} - -impl PartialEq<Self> for AggregateSignature<'_, '_> { - fn eq(&self, other: &Self) -> bool { - let comparator = Comparator::new(self.plan); - self.kind == other.kind - && self - .arguments - .iter() - .zip(other.arguments.iter()) - .all(|(l, r)| comparator.are_subtrees_equal(*l, *r).unwrap_or(false)) - } -} - -impl Eq for AggregateSignature<'_, '_> {} - +/// Id of grouping expression united with reference to plan +/// for the ease of expressions comparison (see +/// implementation of `Hash` and `PartialEq` traits). #[derive(Debug, Clone)] struct GroupingExpression<'plan> { pub id: NodeId, @@ -158,64 +85,6 @@ impl PartialEq for GroupingExpression<'_> { impl Eq for GroupingExpression<'_> {} -impl<'plan> AggrCollector<'plan> { - pub fn with_capacity(plan: &'plan Plan, capacity: usize) -> AggrCollector<'plan> { - AggrCollector { - infos: Vec::with_capacity(capacity), - parent_rel: None, - plan, - } - } - - pub fn take_aggregates(&mut self) -> Vec<AggrInfo> { - std::mem::take(&mut self.infos) - } - - /// Collect aggregates in internal field by traversing expression tree `top` - /// - /// # Arguments - /// * `top` - id of expression root in which to look for aggregates - /// * `parent_rel` - id of parent relational node, where `top` is located. It is used to - /// create `AggrInfo` - /// - /// # Errors - /// - invalid expression tree pointed by `top` - pub fn collect_aggregates( - &mut self, - top: NodeId, - parent_rel: NodeId, - ) -> Result<(), SbroadError> { - self.parent_rel = Some(parent_rel); - self.find(top, None)?; - self.parent_rel = None; - Ok(()) - } - - fn find(&mut self, current: NodeId, parent: Option<NodeId>) -> Result<(), SbroadError> { - let expr = self.plan.get_expression_node(current)?; - if let Expression::StableFunction(StableFunction { name, feature, .. }) = expr { - let is_distinct = matches!(feature, Some(FunctionFeature::Distinct)); - if let Some(aggr) = SimpleAggregate::new(name, current) { - let Some(parent_rel) = self.parent_rel else { - return Err(SbroadError::Invalid(Entity::AggregateCollector, None)); - }; - let info = AggrInfo { - parent_rel, - parent_expr: parent, - aggr, - is_distinct, - }; - self.infos.push(info); - return Ok(()); - }; - } - for child in self.plan.nodes.expr_iter(current, false) { - self.find(child, Some(current))?; - } - Ok(()) - } -} - /// Maps id of `GroupBy` expression used in `GroupBy` (from local stage) /// to list of locations where this expression is used in other relational /// operators like `Having`, `Projection`. @@ -229,11 +98,12 @@ impl<'plan> AggrCollector<'plan> { /// In case there is a reference (or expression containing it) in the final relational operator /// that doesn't correspond to any GroupBy expression, an error should have been thrown on the /// stage of `collect_grouping_expressions`. -type GroupbyExpressionsMap = HashMap<NodeId, Vec<ExpressionLocationIds>>; +type GroupbyExpressionsMap = AHashMap<NodeId, Vec<ExpressionLocationId>>; + /// Maps id of `GroupBy` expression used in `GroupBy` (from local stage) /// to corresponding local alias used in local Projection. Note: /// this map does not contain mappings between grouping expressions from -/// distinct aggregates (it is stored in corresponding `AggrInfo` for that +/// distinct aggregates (it is stored in corresponding `Aggregate` for that /// aggregate) /// /// For example: @@ -241,27 +111,30 @@ type GroupbyExpressionsMap = HashMap<NodeId, Vec<ExpressionLocationIds>>; /// map query: `select a as l1, b group by a, b` /// Then this map will map id of `a` to `l1` type LocalAliasesMap = HashMap<NodeId, Rc<String>>; -type LocalAggrInfo = (AggregateKind, Vec<NodeId>, Rc<String>); /// Helper struct to map expressions used in `GroupBy` to /// expressions used in some other node (`Projection`, `Having`, `OrderBy`) struct ExpressionMapper<'plan> { /// List of expressions ids of `GroupBy` gr_exprs: &'plan Vec<NodeId>, - map: GroupbyExpressionsMap, + map: &'plan mut GroupbyExpressionsMap, plan: &'plan Plan, /// Id of relational node (`Projection`, `Having`, `OrderBy`) - node_id: Option<NodeId>, + rel_id: NodeId, } impl<'plan> ExpressionMapper<'plan> { - fn new(gr_expressions: &'plan Vec<NodeId>, plan: &'plan Plan) -> ExpressionMapper<'plan> { - let map: GroupbyExpressionsMap = HashMap::new(); + fn new( + gr_exprs: &'plan Vec<NodeId>, + plan: &'plan Plan, + rel_id: NodeId, + map: &'plan mut GroupbyExpressionsMap, + ) -> ExpressionMapper<'plan> { ExpressionMapper { - gr_exprs: gr_expressions, + gr_exprs, map, plan, - node_id: None, + rel_id, } } @@ -269,37 +142,21 @@ impl<'plan> ExpressionMapper<'plan> { /// to find subexpressions that match expressions located in `GroupBy`, /// when match is found it is stored in map passed to [`ExpressionMapper`]'s /// constructor. - /// - /// # Arguments - /// * `expr_root` - expression id from which matching will start - /// * `node_id` - id of relational node (`Having`, `Projection`, `OrderBy`), - /// where expression pointed by `expr_root` is located - /// - /// # Errors - /// - invalid references in any expression (`GroupBy`'s or node's one) - /// - invalid query: node expression contains references that are not - /// found in `GroupBy` expression. The reason is that user specified expression in - /// node that does not match any expression in `GroupBy` - fn find_matches(&mut self, expr_root: NodeId, node_id: NodeId) -> Result<(), SbroadError> { - self.node_id = Some(node_id); + fn find_matches(&mut self, expr_root: NodeId) -> Result<(), SbroadError> { self.find(expr_root, None)?; - self.node_id = None; Ok(()) } /// Helper function for `find_matches` which compares current node to `GroupBy` expressions /// and if no match is found recursively calls itself. - fn find(&mut self, current: NodeId, parent: Option<NodeId>) -> Result<(), SbroadError> { - let Some(node_id) = self.node_id else { - return Err(SbroadError::Invalid(Entity::ExpressionMapper, None)); - }; + fn find(&mut self, current: NodeId, parent_expr: Option<NodeId>) -> Result<(), SbroadError> { let is_ref = matches!( self.plan.get_expression_node(current), Ok(Expression::Reference(_)) ); let is_sq_ref = is_ref && self.plan.is_additional_child_of_rel( - node_id, + self.rel_id, self.plan.get_relational_from_reference_node(current)?, )?; // Because subqueries are replaced with References, we must not @@ -318,7 +175,7 @@ impl<'plan> ExpressionMapper<'plan> { }) .copied() { - let location = ExpressionLocationIds::new(current, parent, node_id); + let location = ExpressionLocationId::new(current, parent_expr, self.rel_id); if let Some(v) = self.map.get_mut(&gr_expr) { v.push(location); } else { @@ -349,33 +206,65 @@ impl<'plan> ExpressionMapper<'plan> { } Ok(()) } +} + +fn grouping_expr_local_alias(index: usize) -> Rc<String> { + Rc::new(format!("gr_expr_{index}")) +} + +/// Capacity for the vecs/maps of grouping expressions we expect +/// to extract from nodes like Projection, GroupBy and Having. +const GR_EXPR_CAPACITY: usize = 5; + +/// Info helpful to generate final GroupBy node (on Reduce stage). +struct GroupByReduceInfo { + local_aliases_map: LocalAliasesMap, + /// Positions of grouping expressions added to the output of local + /// Projection. Used for generating MotionKey for segmented motion. + /// That's the reason we don't count grouping expressions came from + /// distinct aggregates here as they don't influence distribution. + grouping_positions: Vec<usize>, +} - pub fn get_matches(&mut self) -> GroupbyExpressionsMap { - std::mem::take(&mut self.map) +impl GroupByReduceInfo { + fn new() -> Self { + Self { + local_aliases_map: HashMap::with_capacity(GR_EXPR_CAPACITY), + grouping_positions: Vec::with_capacity(GR_EXPR_CAPACITY), + } } } -impl Plan { - #[allow(unreachable_code)] - fn generate_local_alias(id: NodeId) -> String { - #[cfg(feature = "mock")] - { - return format!("column_{id}"); +/// Info about both local and final GroupBy nodes. Such info is not +/// generated in case query doesn't require GroupBy nodes. E.g. `select sum(a) from t` +/// will require only two additional nodes: local Projection and a Motion node (see +/// logic under `add_two_stage_aggregation`). +struct GroupByInfo { + id: NodeId, + grouping_exprs: Vec<NodeId>, + gr_exprs_map: GroupbyExpressionsMap, + /// Map of { grouping_expr under local GroupBy -> its alias }. + grouping_expr_to_alias_map: OrderedMap<NodeId, Rc<String>, RepeatableState>, + reduce_info: GroupByReduceInfo, +} + +impl GroupByInfo { + fn new(id: NodeId) -> Self { + Self { + id, + grouping_exprs: Vec::with_capacity(GR_EXPR_CAPACITY), + gr_exprs_map: AHashMap::with_capacity(GR_EXPR_CAPACITY), + grouping_expr_to_alias_map: OrderedMap::with_hasher(RepeatableState), + reduce_info: GroupByReduceInfo::new(), } - format!("{}_{id}", uuid::Uuid::new_v4().as_simple()) } +} +impl Plan { /// Used to create a `GroupBy` IR node from AST. /// The added `GroupBy` node is local - meaning /// that it is part of local stage in 2-stage /// aggregation. For more info, see `add_two_stage_aggregation`. - /// - /// # Arguments - /// * `children` - plan's ids of `group by` children from AST - /// - /// # Errors - /// - invalid children count - /// - failed to create output for `GroupBy` pub fn add_groupby_from_ast(&mut self, children: &[NodeId]) -> Result<NodeId, SbroadError> { let Some((first_child, other)) = children.split_first() else { return Err(SbroadError::UnexpectedNumberOfValues( @@ -383,98 +272,34 @@ impl Plan { )); }; - let groupby_id = self.add_groupby(*first_child, other, false, None)?; + let groupby_id = self.add_groupby(*first_child, other, None)?; Ok(groupby_id) } - /// Helper function to add `group by` to IR - /// - /// # Errors - /// - `child_id` - invalid `Relational` node - /// - `grouping_exprs` - contains non-expr id + /// Helper function to add `group by` to IR. pub fn add_groupby( &mut self, child_id: NodeId, grouping_exprs: &[NodeId], - is_final: bool, - expr_parent: Option<NodeId>, + prev_refs_parent_id: Option<NodeId>, ) -> Result<NodeId, SbroadError> { let final_output = self.add_row_for_output(child_id, &[], true, None)?; let groupby = GroupBy { children: [child_id].to_vec(), gr_exprs: grouping_exprs.to_vec(), output: final_output, - is_final, }; let groupby_id = self.add_relational(groupby.into())?; self.replace_parent_in_subtree(final_output, None, Some(groupby_id))?; for expr in grouping_exprs { - self.replace_parent_in_subtree(*expr, expr_parent, Some(groupby_id))?; + self.replace_parent_in_subtree(*expr, prev_refs_parent_id, Some(groupby_id))?; } Ok(groupby_id) } - /// Collect information about aggregates - /// - /// Aggregates can appear in `Projection`, `Having`, `OrderBy` - /// - /// # Arguments - /// [`finals`] - ids of nodes in final (reduce stage) before adding two stage aggregation. - /// It may contain ids of `Projection`, `Having` or `NamedWindows`. - /// Note: final `GroupBy` is not present because it will be added later in 2-stage pipeline. - fn collect_aggregates(&self, finals: &Vec<NodeId>) -> Result<Vec<AggrInfo>, SbroadError> { - let mut collector = AggrCollector::with_capacity(self, AGGR_CAPACITY); - for node_id in finals { - let node = self.get_relation_node(*node_id)?; - match node { - Relational::Projection(Projection { output, .. }) => { - for col in self.get_row_list(*output)? { - collector.collect_aggregates(*col, *node_id)?; - } - } - Relational::NamedWindows(_) => { - unreachable!("NamedWindows node should not be present in finals"); - } - Relational::Having(Having { filter, .. }) => { - collector.collect_aggregates(*filter, *node_id)?; - } - _ => { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "unexpected relational node ({node_id:?}): {node:?}" - )), - )) - } - } - } - - let aggr_infos = collector.take_aggregates(); - self.validate_aggregates(&aggr_infos)?; - - Ok(aggr_infos) - } - - /// Validates expressions used in aggregates - /// - /// Currently we only check that there is no aggregates inside aggregates - fn validate_aggregates(&self, aggr_infos: &Vec<AggrInfo>) -> Result<(), SbroadError> { - for info in aggr_infos { - let top = info.aggr.fun_id; - if self.contains_aggregates(top, false)? { - return Err(SbroadError::Invalid( - Entity::Query, - Some("aggregate function inside aggregate function is not allowed.".into()), - )); - } - } - - Ok(()) - } - /// Get ids of nodes in Reduce stage (finals) and id of the top node in Map stage. /// /// Finals are nodes in Reduce stage without final `GroupBy`. @@ -494,7 +319,7 @@ impl Plan { &self, final_proj_id: NodeId, ) -> Result<(Vec<NodeId>, NodeId), SbroadError> { - let mut finals: Vec<NodeId> = vec![]; + let mut finals: Vec<NodeId> = Vec::with_capacity(3); let get_first_child = |rel_id: NodeId| -> Result<NodeId, SbroadError> { let c = *self .get_relational_children(rel_id)? @@ -526,208 +351,266 @@ impl Plan { )) } - /// Collects information about grouping expressions for future use. - /// In case there is a `Projection` with `distinct` modifier and - /// no `GroupBy` node, a `GroupBy` node with projection expressions - /// will be created. - /// This function also does all the validation of incorrect usage of - /// expressions used outside of aggregate functions. - /// - /// # Returns - /// - id of `GroupBy` node if is was created or `upper` otherwise - /// - list of ids of expressions used in `GroupBy`. Duplicate expressions are removed. - /// - mapping between `GroupBy` expressions and corresponding expressions in final nodes - /// (`Projection`, `Having`, `GroupBy`, `OrderBy`). - /// - /// # Arguments - /// * `upper` - id of the top node in reduce stage, if `GroupBy` is present in the query - /// the top node in Reduce stage will be `GroupBy`. - /// * `finals` - ids of nodes in final stage starting from `Projection` - /// - /// # Errors - /// - invalid references in `GroupBy` - /// - invalid query with `GroupBy`: some expression in some final node wasn't matched to - /// some `GroupBy` expression - /// - invalid query without `GroupBy` and with aggregates: there are column references outside - /// aggregate functions - /// - invalid query with `Having`: in case there's no `GroupBy`, `Having` may contain - /// only expressions with constants and aggregates. References outside of aggregate functions - /// are illegal. - #[allow(clippy::too_many_lines)] - fn collect_grouping_expressions( + /// In case we deal with a query containing "distinct" qualifier and + /// not containing aggregates or user defined GroupBy, we have to add + /// GroupBy node for fulfill "distinct" semantics. + fn add_group_by_for_distinct( &mut self, + proj_id: NodeId, upper: NodeId, - finals: &Vec<NodeId>, - has_aggregates: bool, - ) -> Result<(NodeId, Vec<NodeId>, GroupbyExpressionsMap), SbroadError> { - let mut grouping_expr = vec![]; - let mut gr_expr_map: GroupbyExpressionsMap = HashMap::new(); - let mut upper = upper; - - let mut has_groupby = matches!(self.get_relation_node(upper)?, Relational::GroupBy(_)); - - if !has_groupby && !has_aggregates { - if let Some(proj_id) = finals.first() { - if let Relational::Projection(Projection { - is_distinct, - output, - .. - }) = self.get_relation_node(*proj_id)? - { - if *is_distinct { - let proj_cols_len = self.get_row_list(*output)?.len(); - let mut grouping_exprs: Vec<NodeId> = Vec::with_capacity(proj_cols_len); - for i in 0..proj_cols_len { - let aliased_col = self.get_proj_col(*proj_id, i)?; - let proj_col_id = if let Expression::Alias(Alias { child, .. }) = - self.get_expression_node(aliased_col)? - { - *child - } else { - aliased_col - }; - // Copy expression from Projection to GroupBy. - let col = SubtreeCloner::clone_subtree(self, proj_col_id)?; - grouping_exprs.push(col); - } - upper = self.add_groupby(upper, &grouping_exprs, false, Some(*proj_id))?; + ) -> Result<Option<NodeId>, SbroadError> { + let Relational::Projection(Projection { + is_distinct, + output, + .. + }) = self.get_relation_node(proj_id)? + else { + unreachable!("Projection expected as a top final node") + }; - has_groupby = true; - } - } + let groupby_id = if *is_distinct { + let proj_cols_len = self.get_row_list(*output)?.len(); + let mut grouping_exprs: Vec<NodeId> = Vec::with_capacity(proj_cols_len); + for i in 0..proj_cols_len { + let aliased_col = self.get_proj_col(proj_id, i)?; + let proj_col_id = if let Expression::Alias(Alias { child, .. }) = + self.get_expression_node(aliased_col)? + { + *child + } else { + aliased_col + }; + // Copy expression from Projection to GroupBy. + let col = SubtreeCloner::clone_subtree(self, proj_col_id)?; + grouping_exprs.push(col); } - } + let groupby_id = self.add_groupby(upper, &grouping_exprs, Some(proj_id))?; + Some(groupby_id) + } else { + None + }; + Ok(groupby_id) + } - if has_groupby { - let old_gr_exprs = self.get_grouping_exprs(upper)?; - // remove duplicate expressions - let mut unique_grouping_exprs: OrderedSet<GroupingExpression, _> = - OrderedSet::with_capacity_and_hasher(old_gr_exprs.len(), RepeatableState); - for gr_expr in old_gr_exprs { - unique_grouping_exprs.insert(GroupingExpression::new(*gr_expr, self)); - } - let grouping_exprs: Vec<NodeId> = unique_grouping_exprs.iter().map(|e| e.id).collect(); - grouping_expr.extend(grouping_exprs.iter()); - self.set_grouping_exprs(upper, grouping_exprs)?; - - let mut mapper = ExpressionMapper::new(&grouping_expr, self); - for node_id in finals { - match self.get_relation_node(*node_id)? { - Relational::Projection(Projection { output, .. }) => { - for col in self.get_row_list(*output)? { - mapper.find_matches(*col, *node_id)?; - } - } - Relational::NamedWindows(_) => { - unreachable!("NamedWindows node should not be present in finals"); - } - Relational::Having(Having { filter, .. }) => { - mapper.find_matches(*filter, *node_id)?; + /// Fill grouping expression map (see comments next to + /// `GroupbyExpressionsMap` definition). + #[allow(clippy::too_many_lines)] + fn fill_gr_exprs_map( + &mut self, + finals: &Vec<NodeId>, + groupby_info: &mut GroupByInfo, + ) -> Result<(), SbroadError> { + for rel_id in finals { + let final_node = self.get_relation_node(*rel_id)?; + match final_node { + Relational::Projection(Projection { output, .. }) => { + let mut mapper = ExpressionMapper::new( + &groupby_info.grouping_exprs, + self, + *rel_id, + &mut groupby_info.gr_exprs_map, + ); + for col in self.get_row_list(*output)? { + mapper.find_matches(*col)?; } - _ => {} + } + Relational::Having(Having { filter, .. }) => { + let mut mapper = ExpressionMapper::new( + &groupby_info.grouping_exprs, + self, + *rel_id, + &mut groupby_info.gr_exprs_map, + ); + mapper.find_matches(*filter)?; + } + _ => { + unreachable!("{final_node:?} node should not be present in finals"); } } - gr_expr_map = mapper.get_matches(); } - if has_aggregates && !has_groupby { - // check that all column references are inside aggregate functions - for id in finals { - let node = self.get_relation_node(*id)?; - match node { - Relational::Projection(Projection { output, .. }) => { - for col in self.get_row_list(*output)? { - let filter = |node_id: NodeId| -> bool { - matches!( - self.get_node(node_id), - Ok(Node::Expression(Expression::Reference(_))) - ) - }; - let mut dfs = PostOrderWithFilter::with_capacity( - |x| self.nodes.aggregate_iter(x, false), - EXPR_CAPACITY, - Box::new(filter), - ); - dfs.populate_nodes(*col); - let nodes = dfs.take_nodes(); - for level_node in nodes { - let id = level_node.1; - let n = self.get_expression_node(id)?; - if let Expression::Reference(_) = n { - let alias = match self.get_alias_from_reference_node(&n) { - Ok(v) => v.to_smolstr(), - Err(e) => e.to_smolstr(), - }; - return Err(SbroadError::Invalid(Entity::Query, - Some(format_smolstr!("found column reference ({}) outside aggregate function", to_user(alias))))); - } - } - } - } - Relational::Having(Having { filter, .. }) => { - let mut bfs = BreadthFirst::with_capacity( + Ok(()) + } + + /// In case query doesn't contain user defined GroupBy, check that all + /// column references under `finals` are inside aggregate functions. + fn check_refs_out_of_aggregates(&self, finals: &Vec<NodeId>) -> Result<(), SbroadError> { + for id in finals { + let node = self.get_relation_node(*id)?; + match node { + Relational::Projection(Projection { output, .. }) => { + for col in self.get_row_list(*output)? { + let filter = |node_id: NodeId| -> bool { + matches!( + self.get_node(node_id), + Ok(Node::Expression(Expression::Reference(_))) + ) + }; + let mut dfs = PostOrderWithFilter::with_capacity( |x| self.nodes.aggregate_iter(x, false), EXPR_CAPACITY, - EXPR_CAPACITY, + Box::new(filter), ); - bfs.populate_nodes(*filter); - let nodes = bfs.take_nodes(); + dfs.populate_nodes(*col); + let nodes = dfs.take_nodes(); for level_node in nodes { let id = level_node.1; - if let Expression::Reference(_) = self.get_expression_node(id)? { + let n = self.get_expression_node(id)?; + if let Expression::Reference(_) = n { + let alias = match self.get_alias_from_reference_node(&n) { + Ok(v) => v.to_smolstr(), + Err(e) => e.to_smolstr(), + }; return Err(SbroadError::Invalid( Entity::Query, - Some("HAVING argument must appear in the GROUP BY clause or be used in an aggregate function".into()) + Some(format_smolstr!( + "found column reference ({}) outside aggregate function", + to_user(alias) + )), )); } } } - _ => {} } + Relational::Having(Having { filter, .. }) => { + let mut dfs = PostOrder::with_capacity( + |x| self.nodes.aggregate_iter(x, false), + EXPR_CAPACITY, + ); + dfs.populate_nodes(*filter); + let nodes = dfs.take_nodes(); + for level_node in nodes { + let id = level_node.1; + if let Expression::Reference(_) = self.get_expression_node(id)? { + return Err(SbroadError::Invalid( + Entity::Query, + Some("HAVING argument must appear in the GROUP BY clause or be used in an aggregate function".into()) + )); + } + } + } + _ => {} } } + Ok(()) + } - Ok((upper, grouping_expr, gr_expr_map)) + /// Check for GroupBy on bucket_id column. + /// In that case GroupBy can be done locally. + fn check_bucket_id_under_group_by( + &self, + grouping_exprs: &Vec<NodeId>, + ) -> Result<bool, SbroadError> { + for expr_id in grouping_exprs { + let Expression::Reference(Reference { position, .. }) = + self.get_expression_node(*expr_id)? + else { + continue; + }; + let child_id = self.get_relational_from_reference_node(*expr_id)?; + let mut context = self.context_mut(); + if let Some(shard_positions) = context.get_shard_columns_positions(child_id, self)? { + if shard_positions[0] == Some(*position) || shard_positions[1] == Some(*position) { + return Ok(true); + } + } + } + Ok(false) } - /// Add expressions used as arguments to distinct aggregates to `GroupBy` in reduce stage - /// - /// E.g: For query below, this func should add b*b to reduce `GroupBy` - /// `select a, sum(distinct b*b), count(c) from t group by a` - /// Map: `select a as l1, b*b as l2, count(c) as l3 from t group by a, b` - /// Reduce: `select l1, sum(distinct l2), sum(l3) from tmp_space group by l1` - fn add_distinct_aggregates_to_local_groupby( + /// In case we have distinct aggregates like `count(distinct a)` they result + /// in adding its argument expressions (expression a for the case above) under + /// local GroupBy node. + fn collect_grouping_exprs_from_distinct_aggrs<'aggr>( + &self, + aggrs: &'aggr mut [Aggregate], + ) -> Result<Vec<(NodeId, &'aggr mut Aggregate)>, SbroadError> { + let mut res = Vec::with_capacity(aggrs.len()); + for aggr in aggrs.iter_mut().filter(|x| x.is_distinct) { + let arg: NodeId = *self + .nodes + .expr_iter(aggr.fun_id, false) + .collect::<Vec<NodeId>>() + .first() + .expect("Number of args for aggregate should have been already checked"); + res.push((arg, aggr)); + } + Ok(res) + } + + /// Adds grouping expressions to columns of local projection. + fn add_grouping_exprs( &mut self, - upper: NodeId, - additional_grouping_exprs: Vec<NodeId>, - ) -> Result<NodeId, SbroadError> { - let mut local_proj_child_id = upper; - if !additional_grouping_exprs.is_empty() { - if let MutRelational::GroupBy(GroupBy { gr_exprs, .. }) = - self.get_mut_relation_node(local_proj_child_id)? - { - gr_exprs.extend(additional_grouping_exprs); - } else { - local_proj_child_id = - self.add_groupby(upper, &additional_grouping_exprs, true, None)?; - self.set_distribution(self.get_relational_output(local_proj_child_id)?)?; - } + groupby_info: &mut GroupByInfo, + output_cols: &mut Vec<NodeId>, + ) -> Result<(), SbroadError> { + // Map of { grouping_expr_alias -> proj_output_position }. + let mut alias_to_pos: HashMap<Rc<String>, usize> = HashMap::with_capacity(EXPR_CAPACITY); + // Add grouping expressions to local projection. + for (pos, (gr_expr, local_alias)) in + groupby_info.grouping_expr_to_alias_map.iter().enumerate() + { + let new_gr_expr = SubtreeCloner::clone_subtree(self, *gr_expr)?; + let new_alias = self.nodes.add_alias(&local_alias, new_gr_expr)?; + output_cols.push(new_alias); + alias_to_pos.insert(local_alias.clone(), pos); } - Ok(local_proj_child_id) + // Note: we need to iterate only over grouping expressions that were present + // in original user query here. We must not use the grouping expressions + // that come from distinct aggregates. This is because they are handled separately: + // local aliases map is needed only for GroupBy expressions in the original query and + // grouping positions are used to create a Motion later, which should take into account + // only positions from GroupBy expressions in the original user query. + for expr_id in &groupby_info.grouping_exprs { + let local_alias = groupby_info + .grouping_expr_to_alias_map + .get(expr_id) + .expect("grouping expressions map should contain given expr_id") + .clone(); + groupby_info + .reduce_info + .local_aliases_map + .insert(*expr_id, local_alias.clone()); + let pos = alias_to_pos + .get(&local_alias) + .expect("alias map should contain given local alias"); + groupby_info.reduce_info.grouping_positions.push(*pos); + } + + Ok(()) + } + + /// Creates columns for local projection + /// + /// local projection contains groupby columns + local aggregates, + /// this function removes duplicated among them and creates the list for output + /// `Row` for local projection. + /// + /// In case we have distinct aggregates and no groupby in original query, + /// local `GroupBy` node will created. + fn create_columns_for_local_proj( + &mut self, + aggrs: &mut [Aggregate], + groupby_info: &mut Option<GroupByInfo>, + ) -> Result<Vec<NodeId>, SbroadError> { + let mut output_cols: Vec<NodeId> = vec![]; + + if let Some(groupby_info) = groupby_info.as_mut() { + self.add_grouping_exprs(groupby_info, &mut output_cols)?; + }; + self.add_local_aggregates(aggrs, &mut output_cols)?; + + Ok(output_cols) } /// Create Projection node for Map(local) stage of 2-stage aggregation /// /// # Arguments /// - /// * `child_id` - id of child for Projection node to be created. - /// * `aggr_infos` - vector of metadata for each aggregate function that was found in final + /// * `upper_id` - id of child for Projection node to be created. + /// * `aggrs` - vector of metadata for each aggregate function that was found in final /// projection. Each info specifies what kind of aggregate it is (sum, avg, etc) and location /// in final projection. - /// * `grouping_exprs` - ids of grouping expressions from local `GroupBy`, empty if there is - /// no `GroupBy` in original query. - /// * `finals` - ids of nodes from final stage, starting from `Projection`. - /// Final stage may contain `Projection`, `Limit`, `OrderBy`, `Having` nodes. /// /// Local Projection is created by creating columns for grouping expressions and columns /// for local aggregates. If there is no `GroupBy` in the original query then `child_id` refers @@ -768,26 +651,20 @@ impl Plan { /// ``` /// The same logic must be applied to any node in final stage of 2-stage aggregation: /// `Having`, `GroupBy`, `OrderBy`. See [`add_two_stage_aggregation`] for more details. - /// - /// # Returns - /// - id of local `Projection` that was created. - /// - vector of positions by which `GroupBy` is done. Positions are relative to local `Projection` - /// output. - /// - map between `GroupBy` expression and corresponding local alias. fn add_local_projection( &mut self, - child_id: NodeId, - aggr_infos: &mut Vec<AggrInfo>, - grouping_exprs: &[NodeId], - ) -> Result<(NodeId, Vec<usize>, LocalAliasesMap), SbroadError> { - let (child_id, proj_output_cols, groupby_local_aliases, grouping_positions) = - self.create_columns_for_local_proj(aggr_infos, child_id, grouping_exprs)?; - let proj_output = self.nodes.add_row(proj_output_cols, None); + upper_id: NodeId, + aggrs: &mut Vec<Aggregate>, + groupby_info: &mut Option<GroupByInfo>, + ) -> Result<NodeId, SbroadError> { + let proj_output_cols = self.create_columns_for_local_proj(aggrs, groupby_info)?; + let proj_output: NodeId = self.nodes.add_row(proj_output_cols, None); let ref_rel_nodes = self.get_relational_nodes_from_row(proj_output)?; - let mut children = vec![child_id]; + let mut children = vec![upper_id]; + // Handle subqueries. for ref_rel_node_id in ref_rel_nodes { let rel_node = self.get_relation_node(ref_rel_node_id)?; if matches!(rel_node, Relational::ScanSubQuery { .. }) @@ -815,312 +692,38 @@ impl Plan { }; let proj_id = self.add_relational(proj.into())?; - for info in aggr_infos { - // We take expressions inside aggregate functions from Final projection, - // so we need to update parent - self.replace_parent_in_subtree(info.aggr.fun_id, Some(info.parent_rel), Some(proj_id))?; + // We take expressions inside aggregate functions from Final projection, + // so we need to update parent. + for aggr in aggrs { + self.replace_parent_in_subtree(aggr.fun_id, Some(aggr.parent_rel), Some(proj_id))?; } - self.set_distribution(proj_output)?; - - Ok((proj_id, grouping_positions, groupby_local_aliases)) - } + if let Some(groupby_info) = groupby_info.as_mut() { + let local_projection_output = self.get_row_list(proj_output)?.clone(); - fn create_local_aggregate( - &mut self, - kind: AggregateKind, - arguments: &[NodeId], - local_alias: &str, - ) -> Result<NodeId, SbroadError> { - let fun: Function = Function { - name: kind.to_smolstr(), - behavior: Behavior::Stable, - func_type: kind.to_type(self, arguments)?, - is_system: true, - }; - // We can reuse aggregate expression between local aggregates, because - // all local aggregates are located inside the same motion subtree and we - // assume that each local aggregate does not need to modify its expression - let local_fun_id = self.add_stable_function(&fun, arguments.to_vec(), None)?; - let alias_id = self.nodes.add_alias(local_alias, local_fun_id)?; - Ok(alias_id) - } + for (new_gr_expr_pos, _) in groupby_info.grouping_expr_to_alias_map.iter().enumerate() { + let new_gr_expr_id = *local_projection_output + .get(new_gr_expr_pos) + .expect("Grouping expression should be found under local Projection output"); - /// Creates columns for local projection - /// - /// local projection contains groupby columns + local aggregates, - /// this function removes duplicated among them and creates the list for output - /// `Row` for local projection. - /// - /// In case we have distinct aggregates and no groupby in original query, - /// local `GroupBy` node will created. - /// - /// # Returns - /// - id of local Projection child. - /// - created list of columns - /// - mapping between `GroupBy` expressions and local aliases - /// - grouping positions: positions of columns by which `GroupBy` is done - fn create_columns_for_local_proj( - &mut self, - aggr_infos: &mut [AggrInfo], - upper_id: NodeId, - grouping_exprs: &[NodeId], - ) -> Result<(NodeId, Vec<NodeId>, LocalAliasesMap, Vec<usize>), SbroadError> { - let mut output_cols: Vec<NodeId> = vec![]; - let (local_aliases, child_id, grouping_positions) = - self.add_grouping_exprs(aggr_infos, upper_id, grouping_exprs, &mut output_cols)?; - self.add_local_aggregates(aggr_infos, &mut output_cols)?; - - Ok((child_id, output_cols, local_aliases, grouping_positions)) - } - - /// Adds grouping expressions to columns of local projection - /// - /// # Arguments - /// * `aggr_infos` - list of metadata info for each aggregate - /// * `upper_id` - first node in local stage, if `GroupBy` was - /// present in the original user query, then it is the id of that - /// `GroupBy` - /// * `grouping_exprs` - ids of grouping expressions from local - /// `GroupBy`. It is assumed that there are no duplicate expressions - /// among them. - /// * `output_cols` - list of projection columns, where to push grouping - /// expressions. - /// - /// # Returns - /// - map between grouping expression id and corresponding local alias - /// - id of a Projection child, in case there are distinct aggregates and - /// no local `GroupBy` node, this node will be created - /// - list of positions in projection columns by which `GroupBy` is done. These - /// positions are later used to create Motion node and they include only positions - /// from original `GroupBy`. Grouping expressions from distinct aggregates are not - /// included in this list as they shouldn't be used for Motion node. - fn add_grouping_exprs( - &mut self, - aggr_infos: &mut [AggrInfo], - upper_id: NodeId, - grouping_exprs: &[NodeId], - output_cols: &mut Vec<NodeId>, - ) -> Result<(LocalAliasesMap, NodeId, Vec<usize>), SbroadError> { - let mut unique_grouping_exprs_for_local_stage_full: OrderedMap< - GroupingExpression, - Rc<String>, - RepeatableState, - > = OrderedMap::with_hasher(RepeatableState); - for gr_expr in grouping_exprs.iter() { - unique_grouping_exprs_for_local_stage_full.insert( - GroupingExpression::new(*gr_expr, self), - Rc::new(Self::generate_local_alias(*gr_expr)), - ); - } - - // add grouping expressions found from distinct aggregates to local groupby - let mut grouping_exprs_from_aggregates: Vec<NodeId> = vec![]; - for info in aggr_infos.iter_mut().filter(|x| x.is_distinct) { - let argument = { - let args = self - .nodes - .expr_iter(info.aggr.fun_id, false) - .collect::<Vec<NodeId>>(); - if args.len() > 1 && !matches!(info.aggr.kind, AggregateKind::GRCONCAT) { - return Err(SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "aggregate ({info:?}) have more than one argument" - ))); - } - *args.first().ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "Aggregate function has no children: {info:?}" - )) - })? - }; - let expr = GroupingExpression::new(argument, self); - if let Some(local_alias) = unique_grouping_exprs_for_local_stage_full.get(&expr) { - info.aggr - .lagg_alias - .insert(info.aggr.kind, local_alias.clone()); - } else { - grouping_exprs_from_aggregates.push(argument); - let local_alias = Rc::new(Self::generate_local_alias(argument)); - unique_grouping_exprs_for_local_stage_full.insert(expr, local_alias.clone()); - info.aggr.lagg_alias.insert(info.aggr.kind, local_alias); - } - } - - // Because of borrow checker we need to remove references to Plan from map - let mut unique_grouping_exprs_for_local_stage: OrderedMap< - NodeId, - Rc<String>, - RepeatableState, - > = OrderedMap::with_capacity_and_hasher( - unique_grouping_exprs_for_local_stage_full.len(), - RepeatableState, - ); - for (gr_expr, name) in unique_grouping_exprs_for_local_stage_full.iter() { - unique_grouping_exprs_for_local_stage.insert(gr_expr.id, name.clone()) - } - - let mut alias_to_pos: HashMap<Rc<String>, usize> = HashMap::new(); - // add grouping expressions to local projection - for (pos, (gr_expr, local_alias)) in - unique_grouping_exprs_for_local_stage.iter().enumerate() - { - let new_alias = self.nodes.add_alias(local_alias, *gr_expr)?; - output_cols.push(new_alias); - alias_to_pos.insert(local_alias.clone(), pos); - } - - let mut local_aliases: LocalAliasesMap = - HashMap::with_capacity(unique_grouping_exprs_for_local_stage.len()); - let mut grouping_positions: Vec<usize> = Vec::with_capacity(grouping_exprs.len()); - - // Note: we need to iterate only over grouping expressions that were present - // in original user query here. We must not use the grouping expressions - // that come from distinct aggregates. This is because they are handled separately: - // local aliases map is needed only for GroupBy expressions in the original query and - // grouping positions are used to create a Motion later, which should take into account - // only positions from GroupBy expressions in the original user query. - for expr_id in grouping_exprs.iter() { - if let Some(local_alias) = unique_grouping_exprs_for_local_stage.remove(expr_id) { - local_aliases.insert(*expr_id, local_alias.clone()); - if let Some(pos) = alias_to_pos.get(&local_alias) { - grouping_positions.push(*pos); - } else { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!("missing position for local GroupBy column with local alias: {local_alias}")) - )); - } - } else { - return Err(SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!("invalid map with unique grouping expressions. Could not find grouping expression with id: {expr_id:?}")))); + self.set_parent_in_subtree(new_gr_expr_id, proj_id)?; } - } - - let child_id = self - .add_distinct_aggregates_to_local_groupby(upper_id, grouping_exprs_from_aggregates)?; - Ok((local_aliases, child_id, grouping_positions)) - } - - /// Adds aggregates columns in `output_cols` for local `Projection` - /// - /// This function collects local aggregates from each `AggrInfo`, - /// then it removes duplicates from them using `AggregateSignature`. - /// Next, it creates for each unique aggregate local alias and column. - #[allow(clippy::mutable_key_type)] - fn add_local_aggregates( - &mut self, - aggr_infos: &mut [AggrInfo], - output_cols: &mut Vec<NodeId>, - ) -> Result<(), SbroadError> { - // Aggregate expressions can appear in `Projection`, `Having`, `OrderBy`, if the - // same expression appears in different places, we must not calculate it separately: - // `select sum(a) from t group by b having sum(a) > 10` - // Here `sum(a)` appears both in projection and having, so we need to calculate it only once. - let mut unique_local_aggregates: HashSet<AggregateSignature, RepeatableState> = - HashSet::with_hasher(RepeatableState); - for pos in 0..aggr_infos.len() { - let info = aggr_infos.get(pos).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "invalid idx of aggregate infos ({pos})" - )) - })?; - if info.is_distinct { - continue; - } - let arguments = { - if let Expression::StableFunction(StableFunction { children, .. }) = - self.get_expression_node(info.aggr.fun_id)? - { - children - } else { - return Err(SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!("invalid fun_id: {:?}", info.aggr.fun_id)), - )); - } - }; - for kind in info.aggr.kind.get_local_aggregates_kinds() { - let mut signature = AggregateSignature { - kind, - arguments, - plan: self, - local_alias: None, - }; - if let Some(sig) = unique_local_aggregates.get(&signature) { - if let Some(alias) = &sig.local_alias { - let info = aggr_infos.get_mut(pos).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "invalid idx of aggregate infos ({pos})" - )) - })?; - info.aggr.lagg_alias.insert(kind, alias.clone()); - } else { - return Err(SbroadError::Invalid( - Entity::AggregateSignature, - Some("no local alias".into()), - )); - } - } else { - let info = aggr_infos.get_mut(pos).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format_smolstr!( - "invalid idx of aggregate infos ({pos})" - )) - })?; - let alias = Rc::new(generate_local_alias_for_aggr( - &kind, - &format_smolstr!("{}", info.aggr.fun_id), - )); - info.aggr.lagg_alias.insert(kind, alias.clone()); - signature.local_alias = Some(alias); - unique_local_aggregates.insert(signature); - } - } - } - - // add non-distinct aggregates to local projection - let local_aggregates: Result<Vec<LocalAggrInfo>, SbroadError> = unique_local_aggregates - .into_iter() - .map( - |x| -> Result<(AggregateKind, Vec<NodeId>, Rc<String>), SbroadError> { - match x.get_alias() { - Ok(s) => Ok((x.kind, x.arguments.clone(), s)), - Err(e) => Err(e), - } - }, - ) - .collect(); - for (kind, arguments, local_alias) in local_aggregates? { - let alias_id = self.create_local_aggregate(kind, &arguments, local_alias.as_str())?; - output_cols.push(alias_id); - } + }; + self.set_distribution(proj_output)?; - Ok(()) + Ok(proj_id) } - /// Add final `GroupBy` node in case `grouping_exprs` are not empty - /// - /// # Arguments - /// * `child_id` - id if relational node that will the child of `GroupBy` - /// * `grouping_exprs` - list of grouping expressions ids (which does not include - /// grouping expressions from distinct arguments) - /// * `local_aliases_map` - map between expression from `GroupBy` to alias used - /// at local stage - /// - /// # Returns - /// - if `GroupBy` node was created, return its id - /// - if `GroupBy` node was not created, return `child_id` + /// Add final `GroupBy` node in case `grouping_exprs` are not empty. fn add_final_groupby( &mut self, child_id: NodeId, - grouping_exprs: &Vec<NodeId>, - local_aliases_map: &LocalAliasesMap, + groupby_info: &GroupByInfo, ) -> Result<NodeId, SbroadError> { - if grouping_exprs.is_empty() { - // no GroupBy in the original query, nothing to do - return Ok(child_id); - } + let grouping_exprs = &groupby_info.grouping_exprs; + let local_aliases_map = &groupby_info.reduce_info.local_aliases_map; + let mut gr_exprs: Vec<NodeId> = Vec::with_capacity(grouping_exprs.len()); - let child_map = ColumnPositionMap::new(self, child_id)?; + let child_map: ColumnPositionMap = ColumnPositionMap::new(self, child_id)?; let mut nodes = Vec::with_capacity(grouping_exprs.len()); for expr_id in grouping_exprs { let Some(local_alias) = local_aliases_map.get(expr_id) else { @@ -1157,6 +760,8 @@ impl Plan { gr_exprs.push(new_col_id); } let output = self.add_row_for_output(child_id, &[], true, None)?; + + // Because GroupBy node lies in the Arena64. let final_id = self.nodes.next_id(ArenaType::Arena64); for col in &gr_exprs { self.replace_parent_in_subtree(*col, None, Some(final_id))?; @@ -1164,7 +769,6 @@ impl Plan { let final_groupby = GroupBy { gr_exprs, children: vec![child_id], - is_final: true, output, }; self.replace_parent_in_subtree(output, None, Some(final_id))?; @@ -1177,16 +781,18 @@ impl Plan { /// references to local aliases. /// /// For example: - /// original query: `select a + b from t group by a + b` + /// original query: `select a + b as user_alias from t group by a + b` /// map query: `select a + b as l1 from t group by a + b` `l1` is local alias /// reduce query: `select l1 as user_alias from tmp_space group by l1` - /// In above example this function will replace `a+b` expression in final `Projection` + /// In above example this function will replace `a + b` expression in final `Projection` #[allow(clippy::too_many_lines)] fn patch_grouping_expressions( &mut self, - local_aliases_map: &LocalAliasesMap, - map: GroupbyExpressionsMap, + groupby_info: &GroupByInfo, ) -> Result<(), SbroadError> { + let local_aliases_map = &groupby_info.reduce_info.local_aliases_map; + let gr_exprs_map = &groupby_info.gr_exprs_map; + type RelationalID = NodeId; type GroupByExpressionID = NodeId; type ExpressionID = NodeId; @@ -1199,14 +805,14 @@ impl Plan { type ParentExpressionMap = HashMap<RelationalID, Vec<(GroupByExpressionID, ExpressionID, ExpressionParent)>>; let map: ParentExpressionMap = { - let mut new_map: ParentExpressionMap = HashMap::with_capacity(map.len()); - for (groupby_expr_id, locations) in map { + let mut new_map: ParentExpressionMap = HashMap::with_capacity(gr_exprs_map.len()); + for (groupby_expr_id, locations) in gr_exprs_map { for location in locations { - let rec = (groupby_expr_id, location.expr, location.parent_expr); - if let Some(u) = new_map.get_mut(&location.rel) { + let rec = (*groupby_expr_id, location.expr_id, location.parent_expr_id); + if let Some(u) = new_map.get_mut(&location.rel_id) { u.push(rec); } else { - new_map.insert(location.rel, vec![rec]); + new_map.insert(location.rel_id, vec![rec]); } } } @@ -1224,7 +830,7 @@ impl Plan { })?; let alias_to_pos_map = ColumnPositionMap::new(self, child_id)?; let mut nodes = Vec::with_capacity(group.len()); - for (gr_expr_id, expr_id, parent) in group { + for (gr_expr_id, expr_id, parent_expr_id) in group { let Some(local_alias) = local_aliases_map.get(&gr_expr_id) else { return Err(SbroadError::Invalid( Entity::Plan, @@ -1252,35 +858,27 @@ impl Plan { col_type, asterisk_source: None, }; - nodes.push((parent, expr_id, gr_expr_id, new_ref)); + nodes.push((parent_expr_id, expr_id, new_ref)); } - for (parent, expr_id, gr_expr_id, node) in nodes { + for (parent_expr_id, expr_id, node) in nodes { let ref_id = self.nodes.push(node.into()); - if let Some(parent_expr_id) = parent { + if let Some(parent_expr_id) = parent_expr_id { self.replace_expression(parent_expr_id, expr_id, ref_id)?; } else { - match self.get_mut_relation_node(rel_id)? { - MutRelational::Projection(_) => { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "{} {gr_expr_id:?} {} {expr_id:?} {}", - "invalid mapping between group by expression", - "and projection one: expression", - "has no parent", - )), - )) - } + // Grouping expression doesn't have parent grouping expression. + let rel_node = self.get_mut_relation_node(rel_id)?; + match rel_node { MutRelational::Having(Having { filter, .. }) => { - *filter = ref_id; + // E.g. `select a from t group by a having a`. + if *filter == expr_id { + *filter = ref_id; + } } _ => { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "unexpected node in Reduce stage: {rel_id:?}" - )), - )) + // Currently Having is the only relational node in which grouping expression + // can not have a parent expression (under Projection all expressions are covered + // with output Row node). + panic!("Unexpected final node met for expression replacement: {rel_node:?}") } } } @@ -1309,160 +907,100 @@ impl Plan { /// * `finals_child_id` - id of a relational node right after `finals` in the plan. In case /// original query had `GroupBy`, this will be final `GroupBy` id. /// * `local_aliases_map` - map between grouping expressions ids and corresponding local aliases. - /// * `aggr_infos` - list of metadata about aggregates - /// * `gr_expr_map` - map between grouping expressions in `GroupBy` and grouping expressions - /// used in `finals`. + /// * `aggrs` - list of metadata about aggregates fn patch_finals( &mut self, finals: &[NodeId], finals_child_id: NodeId, - local_aliases_map: &LocalAliasesMap, - aggr_infos: &Vec<AggrInfo>, - gr_expr_map: GroupbyExpressionsMap, + aggrs: &Vec<Aggregate>, + groupby_info: &Option<GroupByInfo>, ) -> Result<(), SbroadError> { - // After we added a Map stage, we need to update output - // of nodes in Reduce stage - if let Some(last) = finals.last() { - if let Some(first) = self.get_mut_relation_node(*last)?.mut_children().get_mut(0) { - *first = finals_child_id; - } - } + // Update relational child of the last final. + let last_final_id = finals.last().expect("last final node should exist"); + *self + .get_mut_relation_node(*last_final_id)? + .mut_children() + .get_mut(0) + .expect("last final node should have child") = finals_child_id; + + // After we added a Map stage, we need to + // update output of Having in Reduce stage. for node_id in finals.iter().rev() { let node = self.get_relation_node(*node_id)?; match node { - // Projection node is the top node in finals: its aliases - // must not be changed (because those are user aliases), so - // nothing to do here - Relational::Projection(_) => {} - Relational::NamedWindows(_) => { - unreachable!("NamedWindows node should not be in finals") + Relational::Projection(_) => { + // Projection node is the top node in finals: its aliases + // must not be changed (because those are user aliases), so + // nothing to do here. } Relational::Having(Having { children, .. }) => { - let child_id = *children.first().ok_or_else(|| { - SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!("Having ({node_id:?}) has no children!")), - ) - })?; + let child_id = *children.first().expect("Having should have a child"); let output = self.add_row_for_output(child_id, &[], true, None)?; *self.get_mut_relation_node(*node_id)?.mut_output() = output; self.replace_parent_in_subtree(output, None, Some(*node_id))?; } - _ => { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!("Unexpected node in reduce stage: {node:?}")), - )) - } + _ => unreachable!("Unexpected node in reduce stage: {node:?}"), } } - self.patch_grouping_expressions(local_aliases_map, gr_expr_map)?; - let mut parent_to_infos: HashMap<NodeId, Vec<AggrInfo>> = + if let Some(groupby_info) = groupby_info { + self.patch_grouping_expressions(groupby_info)?; + } + + let mut parent_to_aggrs: HashMap<NodeId, Vec<Aggregate>> = HashMap::with_capacity(finals.len()); - for info in aggr_infos { - if let Some(v) = parent_to_infos.get_mut(&info.parent_rel) { - v.push(info.clone()); + for aggr in aggrs { + if let Some(v) = parent_to_aggrs.get_mut(&aggr.parent_rel) { + v.push(aggr.clone()); } else { - parent_to_infos.insert(info.parent_rel, vec![info.clone()]); + parent_to_aggrs.insert(aggr.parent_rel, vec![aggr.clone()]); } } - for (parent, infos) in parent_to_infos { - let child_id = { - let children = self.get_relational_children(parent)?; - *children.get(0).ok_or_else(|| { - SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!( - "patch aggregates: rel node ({parent:?}) has no children!" - )), - ) - })? - }; - let alias_to_pos_map = ColumnPositionMap::new(self, child_id)?; - let mut position_kinds = Vec::with_capacity(infos.len()); - for info in &infos { - position_kinds.push( - info.aggr - .get_position_kinds(&alias_to_pos_map, info.is_distinct)?, - ); - } - for (info, pos_kinds) in infos.into_iter().zip(position_kinds) { - let fun_expr = self.get_expression_node(info.aggr.fun_id)?; - let fun_type = fun_expr.calculate_type(self)?; - let final_expr = info.aggr.create_final_aggregate_expr( - parent, - self, - fun_type, - pos_kinds, - info.is_distinct, - )?; - if let Some(parent_expr) = info.parent_expr { - self.replace_expression(parent_expr, info.aggr.fun_id, final_expr)?; - } else { - let node = self.get_mut_relation_node(parent)?; - return Err(SbroadError::Invalid( - Entity::Aggregate, - Some(format_smolstr!( - "aggregate info for {node:?} that hat no parent! Info: {info:?}" - )), - )); - } + for (parent, aggrs) in parent_to_aggrs { + let child_id = *self + .get_relational_children(parent)? + .get(0) + .expect("final relational node should have a child"); + + // AggrKind -> LocalAlias -> Pos in the output + let alias_to_pos_map: ColumnPositionMap = ColumnPositionMap::new(self, child_id)?; + for aggr in aggrs { + // Position in the output with aggregate kind. + let pos_kinds = aggr.get_position_kinds(&alias_to_pos_map)?; + let final_expr = aggr.create_final_aggregate_expr(self, pos_kinds)?; + self.replace_expression(aggr.parent_expr, aggr.fun_id, final_expr)?; } } Ok(()) } - fn add_motion_to_2stage( + fn add_motion_to_two_stage( &mut self, - grouping_positions: &[usize], - motion_parent: NodeId, + groupby_info: &Option<GroupByInfo>, + finals_child_id: NodeId, finals: &[NodeId], ) -> Result<(), SbroadError> { - let proj_id = *finals.first().ok_or_else(|| { - SbroadError::Invalid(Entity::Plan, Some("no nodes in Reduce stage!".into())) - })?; - if let Relational::Projection(_) = self.get_relation_node(proj_id)? { + let final_proj_id = *finals.first().expect("finals should not be empty"); + if let Relational::Projection(_) = self.get_relation_node(final_proj_id)? { } else { - return Err(SbroadError::Invalid( - Entity::Plan, - Some("expected Projection as first node in reduce stage!".into()), - )); + unreachable!("Projection should be the first node in reduce stage") } - if grouping_positions.is_empty() { - // no GroupBy - let last_final_id = *finals.last().ok_or_else(|| { - SbroadError::Invalid(Entity::Plan, Some("Reduce stage has no nodes!".into())) - })?; - let mut strategy = Strategy::new(last_final_id); - strategy.add_child(motion_parent, MotionPolicy::Full, Program::default()); - self.create_motion_nodes(strategy)?; - self.set_dist(self.get_relational_output(proj_id)?, Distribution::Single)?; - } else { - // we have GroupBy, then finals_child_id is final GroupBy - let child_id = if let Relational::GroupBy(GroupBy { children, .. }) = - self.get_relation_node(motion_parent)? - { - *children.first().ok_or_else(|| { - SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!( - "final GroupBy ({motion_parent:?}) has no children!" - )), - ) - })? - } else { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "expected to have GroupBy under reduce nodes on id: {motion_parent:?}" - )), - )); - }; - let mut strategy = Strategy::new(motion_parent); + // In case we have local GroupBy, then `finals_child_id`` is local GroupBy. + let finals_child_node = self.get_relation_node(finals_child_id)?; + let has_local_group_by = matches!(finals_child_node, Relational::GroupBy(_)); + + if let Relational::GroupBy(GroupBy { children, .. }) = finals_child_node { + let final_group_by_child_id = *children.first().unwrap_or_else(|| { + unreachable!("final GroupBy ({finals_child_id:?}) should have children") + }); + + let groupby_info = groupby_info.as_ref().expect("GroupBy should exists"); + let grouping_positions: &Vec<usize> = &groupby_info.reduce_info.grouping_positions; + + let mut strategy = Strategy::new(finals_child_id); strategy.add_child( - child_id, + final_group_by_child_id, MotionPolicy::Segment(MotionKey { targets: grouping_positions .iter() @@ -1475,95 +1013,40 @@ impl Plan { // When we created final GroupBy we didn't set its distribution, because its // actual child (Motion) wasn't created yet. - self.set_distribution(self.get_relational_output(motion_parent)?)?; - } - Ok(()) - } + self.set_distribution(self.get_relational_output(finals_child_id)?)?; + } else { + // No local GroupBy. + let last_final_id = *finals.last().unwrap(); + let mut strategy = Strategy::new(last_final_id); + strategy.add_child(finals_child_id, MotionPolicy::Full, Program::default()); + self.create_motion_nodes(strategy)?; - /// Adds 2-stage aggregation and returns `true` if there are any aggregate - /// functions or `GroupBy` is present. Otherwise, returns `false` and - /// does nothing. - /// - /// # Errors - /// - failed to create local `GroupBy` node - /// - failed to create local `Projection` node - /// - failed to create `SQ` node - /// - failed to change final `GroupBy` child to `SQ` - /// - failed to update expressions in final `Projection` - pub fn add_two_stage_aggregation( - &mut self, - final_proj_id: NodeId, - ) -> Result<bool, SbroadError> { - let (finals, upper) = self.split_group_by(final_proj_id)?; - let mut aggr_infos = self.collect_aggregates(&finals)?; - let has_aggregates = !aggr_infos.is_empty(); - let (upper, grouping_exprs, gr_expr_map) = - self.collect_grouping_expressions(upper, &finals, has_aggregates)?; - if grouping_exprs.is_empty() && aggr_infos.is_empty() { - return Ok(false); + self.set_dist( + self.get_relational_output(final_proj_id)?, + Distribution::Single, + )?; } - // Check for group by on bucket_id column - // in that case groupby can be done locally. - if !grouping_exprs.is_empty() { - // let shard_col_info = self.track_shard_column_pos(final_proj_id)?; - for expr_id in &grouping_exprs { - let Expression::Reference(Reference { position, .. }) = - self.get_expression_node(*expr_id)? - else { - continue; - }; - let child_id = self.get_relational_from_reference_node(*expr_id)?; - let mut context = self.context_mut(); - if let Some(shard_positions) = - context.get_shard_columns_positions(child_id, self)? - { - if shard_positions[0] == Some(*position) - || shard_positions[1] == Some(*position) - { - return Ok(false); - } - } - } + // Set distribution to final outputs (except Projection). + for node_id in finals.iter().skip(1).rev() { + self.set_distribution(self.get_relational_output(*node_id)?)?; + } + if has_local_group_by { + // In case we've added final GroupBy we set distribution based on it. + self.set_distribution(self.get_relational_output(final_proj_id)?)?; } - let (local_proj_id, grouping_positions, local_aliases_map) = - self.add_local_projection(upper, &mut aggr_infos, &grouping_exprs)?; - - self.set_distribution(self.get_relational_output(local_proj_id)?)?; - let finals_child_id = - self.add_final_groupby(local_proj_id, &grouping_exprs, &local_aliases_map)?; - - self.patch_finals( - &finals, - finals_child_id, - &local_aliases_map, - &aggr_infos, - gr_expr_map, - )?; - self.add_motion_to_2stage(&grouping_positions, finals_child_id, &finals)?; + Ok(()) + } + /// Create Motion nodes for scalar subqueries present under Having node. + fn fix_subqueries_under_having(&mut self, finals: &[NodeId]) -> Result<(), SbroadError> { let mut having_id: Option<NodeId> = None; - // skip Projection for node_id in finals.iter().skip(1).rev() { - self.set_distribution(self.get_relational_output(*node_id)?)?; if let Relational::Having(_) = self.get_relation_node(*node_id)? { having_id = Some(*node_id); } } - - if matches!( - self.get_relation_node(finals_child_id)?, - Relational::GroupBy(_) - ) { - self.set_distribution(self.get_relational_output(final_proj_id)?)?; - } else { - self.set_dist( - self.get_relational_output(final_proj_id)?, - Distribution::Single, - )?; - } - if let Some(having_id) = having_id { if let Relational::Having(Having { filter, output, .. }) = self.get_relation_node(having_id)? @@ -1577,6 +1060,150 @@ impl Plan { self.try_dist_from_subqueries(having_id, output)?; } } + Ok(()) + } + + /// Adds 2-stage aggregation and returns `true` if there are any aggregate + /// functions or `GroupBy` is present. Otherwise, returns `false` and + /// does nothing. + pub fn add_two_stage_aggregation( + &mut self, + final_proj_id: NodeId, + ) -> Result<bool, SbroadError> { + let (finals, mut upper_id) = self.split_group_by(final_proj_id)?; + let mut groupby_info = + if matches!(self.get_relation_node(upper_id)?, Relational::GroupBy(_)) { + // In case user defined GroupBy in initial query. + // + // Example: `select a from t group by a`. + Some(GroupByInfo::new(upper_id)) + } else { + None + }; + + let mut aggrs = self.collect_aggregates(&finals)?; + + if groupby_info.is_none() && aggrs.is_empty() { + if let Some(groupby_id) = self.add_group_by_for_distinct(final_proj_id, upper_id)? { + // In case aggregates or GroupBy are present "distinct" qualifier under + // Projection doesn't add any new features to the plan. Otherwise, we should add + // a new GroupBy node for a local map stage. + // + // Example: `select distinct a, b + 42 from t`. + upper_id = groupby_id; + groupby_info = Some(GroupByInfo::new(upper_id)); + } else { + // Query doesn't contain GroupBy, aggregates or "distinct" qualifier. + // + // Example: `select a, b + 42 from t`. + return Ok(false); + } + } + + if groupby_info.is_none() { + self.check_refs_out_of_aggregates(&finals)?; + } + + let distinct_grouping_exprs = + self.collect_grouping_exprs_from_distinct_aggrs(&mut aggrs)?; + if groupby_info.is_none() && !distinct_grouping_exprs.is_empty() { + // GroupBy doesn't exist and we have to create it just for + // distinct aggregates. + // + // Example: `select sum(distinct a) from t` + + // grouping_exprs will be set few lines below. + let groupby_id = self.add_groupby(upper_id, &[], None)?; + upper_id = groupby_id; + groupby_info = Some(GroupByInfo::new(upper_id)); + } + + // Index for generating local grouping expressions aliases. + let mut local_alias_index = 1; + // Map of { grouping_expression -> (local_alias + parent_rel_id) }. + let mut unique_grouping_expr_to_alias_map: OrderedMap< + GroupingExpression, + Rc<String>, + RepeatableState, + > = OrderedMap::with_capacity_and_hasher(GR_EXPR_CAPACITY, RepeatableState); + // Grouping expressions for local GroupBy. + let mut grouping_exprs_local = Vec::with_capacity(GR_EXPR_CAPACITY); + if let Some(groupby_info) = groupby_info.as_mut() { + // Leave only unique expressions under local GroupBy. + let gr_exprs = self.get_grouping_exprs(groupby_info.id)?; + for gr_expr in gr_exprs { + let local_alias = grouping_expr_local_alias(local_alias_index); + let new_expr = GroupingExpression::new(*gr_expr, self); + if !unique_grouping_expr_to_alias_map.contains_key(&new_expr) { + unique_grouping_expr_to_alias_map.insert(new_expr, local_alias); + local_alias_index += 1; + } + } + for (expr, _) in unique_grouping_expr_to_alias_map.iter() { + let expr_id = expr.id; + grouping_exprs_local.push(expr_id); + groupby_info.grouping_exprs.push(expr_id); + } + + if !groupby_info.grouping_exprs.is_empty() + && self.check_bucket_id_under_group_by(&groupby_info.grouping_exprs)? + { + return Ok(false); + } + } + + // Set local aggregates aliases for distinct aggregatees. For non-distinct aggregates + // they would be set under `add_local_aggregates`. + for (gr_expr, aggr) in distinct_grouping_exprs { + let new_expr = GroupingExpression::new(gr_expr, self); + if let Some(local_alias) = unique_grouping_expr_to_alias_map.get(&new_expr) { + aggr.lagg_aliases.insert(aggr.kind, local_alias.clone()); + } else { + let local_alias = grouping_expr_local_alias(local_alias_index); + local_alias_index += 1; + aggr.lagg_aliases.insert(aggr.kind, local_alias.clone()); + + // Add expressions used as arguments to distinct aggregates to local `GroupBy`. + // + // E.g: For query below, we should add b*b to local `GroupBy` + // `select a, sum(distinct b*b), count(c) from t group by a` + // Map: `select a as l1, b*b as l2, count(c) as l3 from t group by a, b, b*b` + // Reduce: `select l1, sum(distinct l2), sum(l3) from tmp_space group by l1` + grouping_exprs_local.push(gr_expr); + unique_grouping_expr_to_alias_map.insert(new_expr, local_alias); + } + } + + if let Some(groupby_info) = groupby_info.as_mut() { + for (expr, local_alias) in unique_grouping_expr_to_alias_map.iter() { + groupby_info + .grouping_expr_to_alias_map + .insert(expr.id, local_alias.clone()); + } + + self.set_grouping_exprs(groupby_info.id, grouping_exprs_local)?; + self.fill_gr_exprs_map(&finals, groupby_info)?; + } + + if let Some(groupby_info) = &groupby_info { + self.set_distribution(self.get_relational_output(groupby_info.id)?)?; + } + + let local_proj_id = self.add_local_projection(upper_id, &mut aggrs, &mut groupby_info)?; + let finals_child_id = if let Some(groupby_info) = groupby_info.as_ref() { + if groupby_info.grouping_exprs.is_empty() { + local_proj_id + } else { + self.add_final_groupby(local_proj_id, groupby_info)? + } + } else { + local_proj_id + }; + self.patch_finals(&finals, finals_child_id, &aggrs, &groupby_info)?; + + self.add_motion_to_two_stage(&groupby_info, finals_child_id, &finals)?; + + self.fix_subqueries_under_having(&finals)?; Ok(true) } diff --git a/sbroad/sbroad-core/src/ir/transformation/redistribution/tests.rs b/sbroad/sbroad-core/src/ir/transformation/redistribution/tests.rs index 32fdbdf8b9be0ecec953a2d324b85f1c739bdd60..750614e5b924910baf8c06067f45e6c3dc3896b4 100644 --- a/sbroad/sbroad-core/src/ir/transformation/redistribution/tests.rs +++ b/sbroad/sbroad-core/src/ir/transformation/redistribution/tests.rs @@ -249,9 +249,9 @@ fn test_slices_2() { let plan = sql_to_optimized_ir(query, vec![]); insta::assert_snapshot!(plan.as_explain().unwrap(), @r#" - projection (sum(("count_5596"::unsigned))::unsigned -> "col_1") + projection (sum(("count_1"::unsigned))::unsigned -> "col_1") motion [policy: full] - projection (count(("t2"."e"::unsigned))::unsigned -> "count_5596") + projection (count(("t2"."e"::unsigned))::unsigned -> "count_1") join on true::boolean scan projection ("t2"."f"::unsigned -> "f") diff --git a/sbroad/sbroad-core/src/utils.rs b/sbroad/sbroad-core/src/utils.rs index 8f913890f90e42f01b34c98e74dbcad34bc6b384..5ac66195f6aab1c99e212db6ad27eb5b22a8250c 100644 --- a/sbroad/sbroad-core/src/utils.rs +++ b/sbroad/sbroad-core/src/utils.rs @@ -132,6 +132,10 @@ impl<K: Clone + Hash + Eq, V: Clone, S: BuildHasher> OrderedMap<K, V, S> { self.map.get(key) } + pub fn contains_key(&self, key: &K) -> bool { + self.map.contains_key(key) + } + pub fn remove(&mut self, key: &K) -> Option<V> { self.order.retain(|(k, _)| k != key); self.map.remove(key) @@ -165,9 +169,9 @@ impl<'set, V: Clone + Hash + Eq, S: BuildHasher> Iterator for OrderedSetIterator } impl<V: Clone + Hash + Eq, S: BuildHasher> OrderedSet<V, S> { - pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self { + pub fn with_hasher(hasher: S) -> Self { Self { - map: OrderedMap::<V, (), S>::with_capacity_and_hasher(capacity, hasher), + map: OrderedMap::<V, (), S>::with_hasher(hasher), } } @@ -175,6 +179,10 @@ impl<V: Clone + Hash + Eq, S: BuildHasher> OrderedSet<V, S> { self.map.len() } + pub fn contains_key(&self, key: &V) -> bool { + self.map.contains_key(key) + } + pub fn is_empty(&self) -> bool { self.map.is_empty() } diff --git a/test/int/sql/expression.sql b/test/int/sql/expression.sql index e26fd452c5a9c3b4d9db938fa9ff38fb14b29a43..4e3d66dfe0dcb6a08e5a14d39ff17a81add5e7d1 100644 --- a/test/int/sql/expression.sql +++ b/test/int/sql/expression.sql @@ -6,6 +6,11 @@ INSERT INTO t VALUES(1, 1); INSERT INTO t VALUES(2, 1); INSERT INTO t VALUES(3, 2); INSERT INTO t VALUES(4, 3); +DROP TABLE IF EXISTS tb; +CREATE TABLE tb(a INT PRIMARY KEY, b BOOLEAN); +INSERT INTO tb VALUES(1, true); +INSERT INTO tb VALUES(2, true); +INSERT INTO tb VALUES(3, false); -- TEST: reference-under-case-expression -- SQL: @@ -185,4 +190,22 @@ false -- SQL: SELECT FALSE AND (FALSE OR TRUE) -- EXPECTED: -false \ No newline at end of file +false + +-- TEST: having-with-boolean-column +-- SQL: +SELECT sum(a) FROM tb GROUP BY b HAVING b; +-- EXPECTED: +3 + +-- TEST: select-distinct-asterisk +-- SQL: +SELECT DISTINCT * FROM t ORDER BY 1 +-- EXPECTED: +1, 1, 2, 1, 3, 2, 4, 3 + +-- TEST: select-asterisk-with-group-by +-- SQL: +SELECT * FROM t GROUP BY a, b ORDER BY 1 +-- EXPECTED: +1, 1, 2, 1, 3, 2, 4, 3