diff --git a/CHANGELOG.md b/CHANGELOG.md index 37dfe6ac2cf848a3bc63f9e27785df7226448303..7096323bc177b8c24c0124ba732673375c1a9822 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -90,6 +90,10 @@ to 2 and 3. - `DROP PLUGIN` now leaves the plugin's data in the database if `WITH DATA` wasn't specified. Previously we would return an error instead. +- Block and end-of-lines comments in SQL statements are now allowed. + +- Punctuation in unquoted identifiers is no longer allowed. + ## [24.6.1] - 2024-10-28 ### Configuration diff --git a/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs index 7b22c7708077c73c70fd35fb2171c2f4972cee23..091a3ccc3231a0989a24f375040c90bb6167f410 100644 --- a/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad/sbroad-core/src/frontend/sql/ir/tests.rs @@ -1,6 +1,7 @@ use crate::errors::SbroadError; use crate::executor::engine::mock::RouterConfigurationMock; use crate::frontend::sql::ast::{AbstractSyntaxTree, ParseTree, Rule}; +use crate::frontend::sql::ParsingPairsMap; use crate::frontend::Ast; use crate::ir::node::relational::Relational; use crate::ir::node::NodeId; @@ -10,6 +11,7 @@ use crate::ir::value::Value; use crate::ir::{Plan, Positions}; use pest::Parser; use pretty_assertions::assert_eq; +use std::collections::HashMap; use time::{format_description, OffsetDateTime, Time}; fn sql_to_optimized_ir_add_motions_err(query: &str) -> SbroadError { @@ -693,13 +695,13 @@ execution options: fn front_sql_check_arbitraty_utf_in_identifiers() { let input = r#"SELECT "id" "from", "id" as "select", "id" "123»&%ښ۞@Ƶǖselect.""''\\" - , "id" aц1&@$Ƶǖ%^&«»§&%ښ۞@Ƶǖ FROM "test_space" &%ښ۞@Ƶǖ"#; + , "id" Бу_3, "id" aц1ƵǖښƵǖ FROM "test_space" ښƵǖ"#; let plan = sql_to_optimized_ir(input, vec![]); let expected_explain = String::from( - r#"projection ("&%ښ۞@ƶǖ"."id"::unsigned -> "from", "&%ښ۞@ƶǖ"."id"::unsigned -> "select", "&%ښ۞@ƶǖ"."id"::unsigned -> "123»&%ښ۞@Ƶǖselect.""''\\", "&%ښ۞@ƶǖ"."id"::unsigned -> "aц1&@$ƶǖ%^&«»§&%ښ۞@ƶǖ") - scan "test_space" -> "&%ښ۞@ƶǖ" + r#"projection ("ښƶǖ"."id"::unsigned -> "from", "ښƶǖ"."id"::unsigned -> "select", "ښƶǖ"."id"::unsigned -> "123»&%ښ۞@Ƶǖselect.""''\\", "ښƶǖ"."id"::unsigned -> "бу_3", "ښƶǖ"."id"::unsigned -> "aц1ƶǖښƶǖ") + scan "test_space" -> "ښƶǖ" execution options: sql_vdbe_opcode_max = 45000 sql_motion_row_max = 5000 @@ -4342,6 +4344,85 @@ fn front_sql_whitespaces_are_not_ignored() { } } +#[track_caller] +fn syntax_error(query: &str) -> impl '_ + FnOnce(SbroadError) { + move |err| { + eprintln!("{err}"); + eprintln!("QUERY[[{query}]]END QUERY"); + panic!("syntax error"); + } +} + +#[test] +fn test_comment_parsing() { + let comment_texts = ["", "doodle", "select * from $$@"]; + // FIXME: some queries parse differently with comments in specific places. + let queries = [ + "select * from foo", + "select 1", + "select * from foo", + r#"set value to key"#, + // r#"set transaction isolation level read commited"#, + r#"grant create on user vasya to emir option(timeout=1)"#, + r#"alter plugin abc 0.1.0 remove service svc1 from tier tier1 option(timeout=11)"#, + // r#"create table if not exists t(a int primary key,b int) using memtx distributed by(a,b) wait applied locally option(timeout=1)"#, + // r#"with cte1(a,b) as(select * from t),cte2 as(select * from t) select * from t join t on true group by a having b order by a union all select * from t"#, + r#"select cast(1 as int) or not exists (values(true)) and 1+1 and true or (a in (select * from t)) and i is not null"#, + ]; + + for query in queries { + let mut map1 = ParsingPairsMap::new(); + let mut map2 = HashMap::new(); + let mut map3 = HashMap::new(); + let mut standard_parse = AbstractSyntaxTree::empty(); + standard_parse + .fill(query, &mut map1, &mut map2, &mut map3) + .unwrap_or_else(syntax_error(query)); + + let bytes = query.as_bytes(); + // We compute the word boundaries where comments may be inserted in a hacky way. + // This doesn't handle more complex cases, like quoted identifiers, strings or procedures. + let mut word_boundaries = vec![0]; + for n in 1..query.len() { + let is_boundary = |c: u8| + // Whitespace is always a boundary, unless within quoted strings. + // We don't properly handle quotes in this test, so quoted strings/idents + // should be absent from test cases. + c.is_ascii_whitespace() + // Same as with whitespace. Dots are excluded, because they happen in + // versions, dates and field accesses. + || (c.is_ascii_punctuation() && c != b'.'); + if is_boundary(bytes[n - 1]) || is_boundary(bytes[n]) { + word_boundaries.push(n); + } + } + word_boundaries.push(query.len()); + + for boundary in word_boundaries { + for comment in comment_texts { + let get_commented = |prefix: &str, suffix: &str| { + format!( + "{}{prefix}{comment}{suffix}{}", + &query[..boundary], + query.get(boundary..).unwrap_or(""), + ) + }; + for commented_query in [get_commented("--", "\n"), get_commented("/*", "*/")] { + let mut map1 = ParsingPairsMap::new(); + let mut map2 = HashMap::new(); + let mut map3 = HashMap::new(); + let mut parsed = AbstractSyntaxTree::empty(); + parsed + .fill(&commented_query, &mut map1, &mut map2, &mut map3) + .unwrap_or_else(syntax_error(&commented_query)); + + assert_eq!(parsed, standard_parse, "Query: {commented_query}"); + } + } + } + } +} + #[cfg(test)] mod coalesce; #[cfg(test)] diff --git a/sbroad/sbroad-core/src/frontend/sql/query.pest b/sbroad/sbroad-core/src/frontend/sql/query.pest index e0de518afda836de227deb35747e0a0997177986..946a7506e2faf0343aa284ef1d251b80e1b23cfb 100644 --- a/sbroad/sbroad-core/src/frontend/sql/query.pest +++ b/sbroad/sbroad-core/src/frontend/sql/query.pest @@ -21,7 +21,7 @@ Plugin = _{ CreatePlugin | DropPlugin | AlterPlugin } EnablePlugin = ${ PluginVersion ~ W ~ ^"enable" ~ (W ~ TimeoutOption)? } DisablePlugin = ${ PluginVersion ~ W ~ ^"disable" ~ (W ~ TimeoutOption)? } MigrateTo = ${ ^"migrate" ~ W ~ ^"to" ~ W ~ PluginVersion ~ (W ~ MigrateUpOption)? } - RollbackTimeout = !{ ^"rollback_timeout" ~ "=" ~ Duration } + RollbackTimeout = !{ ^"rollback_timeout" ~ WO ~ "=" ~ WO ~ Duration } MigrateUpOptionParam = _{ Timeout | RollbackTimeout } MigrateUpOption = _{ ^"option" ~ WO ~ "(" ~ WO ~ MigrateUpOptionParams ~ WO ~ ")" } MigrateUpOptionParams = _{ MigrateUpOptionParam ~ (WO ~ "," ~ WO ~ MigrateUpOptionParam)* } @@ -89,7 +89,7 @@ DDL = _{ CreateTable | DropTable | CreateIndex | DropIndex | CreateSchema | Drop ForValuesSpec = _{ ForValuesSpecIn | ForValuesSpecFromTo | ForValuesSpecWith } ForValuesSpecIn = { ^"in" ~ W ~ Row } ForValuesSpecFromTo = ${ ^"from" ~ W ~ ForValuesSpecFromToRow ~ W ~ ^"to" ~ W ~ ForValuesSpecFromToRow } - ForValuesSpecFromToRow = !{ "(" ~ ExprOrMinMax ~ ("," ~ ExprOrMinMax)* ~ ")" } + ForValuesSpecFromToRow = !{ "(" ~ WO ~ ExprOrMinMax ~ WO ~ (WO ~ "," ~ WO ~ ExprOrMinMax)* ~ WO ~ ")" } ExprOrMinMax = { MinValue | MaxValue | Expr } MinValue = { ^"minvalue" } MaxValue = { ^"maxvalue" } @@ -100,7 +100,7 @@ DDL = _{ CreateTable | DropTable | CreateIndex | DropIndex | CreateSchema | Drop (W ~ Engine)? ~ (W ~ Distribution)? ~ (W ~ WaitApplied)? ~ (W ~ Partition)? ~ (W ~ TimeoutOption)? } NewTable = @{Table} - Columns = !{ ColumnDef ~ ("," ~ ColumnDef)* } + Columns = !{ ColumnDef ~ WO ~ (WO ~ "," ~ WO ~ ColumnDef)* } ColumnDef = ${ Identifier ~ W ~ ColumnDefType ~ (W ~ ColumnDefIsNull)? ~ (W ~ PrimaryKeyMark)? } ColumnDefIsNull = { (NotFlag ~ W)? ~ ^"null" } PrimaryKeyMark = { ^"primary" ~ W ~ ^"key" } @@ -122,7 +122,7 @@ DDL = _{ CreateTable | DropTable | CreateIndex | DropIndex | CreateSchema | Drop PartitionRange = { ^"range" } PartitionList = { ^"list" } PartitionHash = { ^"hash" } - PartitionBySpec = !{ "(" ~ Identifier ~ ("," ~ Identifier)* ~ ")" } + PartitionBySpec = !{ "(" ~ WO ~ Identifier ~ WO ~ (WO ~ "," ~ WO ~ Identifier)* ~ WO ~ ")" } DropTable = ${ ^"drop" ~ W ~ ^"table" ~ W ~ (IfExists ~ W)? ~ Table ~ (W ~ WaitApplied)? ~ (W ~ TimeoutOption)? } CreateProc = ${ ^"create" ~ W ~ ^"procedure" ~ W ~ (IfNotExists ~ W)? ~ Identifier ~ WO @@ -130,14 +130,14 @@ DDL = _{ CreateTable | DropTable | CreateIndex | DropIndex | CreateSchema | Drop ~ ((^"as" ~ W ~ "$$" ~ WO ~ ProcBody ~ WO ~ "$$") | (^"begin" ~ W ~ "atomic" ~ W ~ ProcBody ~ W ~ "end")) ~ (W ~ WaitApplied)? ~ (W ~ TimeoutOption)? } - ProcParams = !{ "(" ~ ProcParamsColumnDefTypes? ~ ")" } + ProcParams = !{ "(" ~ WO ~ ProcParamsColumnDefTypes? ~ WO ~ ")" } ProcParamsColumnDefTypes = _{ ColumnDefType ~ (WO ~ "," ~ WO ~ ColumnDefType)* } ProcLanguage = { SQL } SQL = { ^"sql" } ProcBody = { (Insert | Update | Delete) } DropProc = ${ ^"drop" ~ W ~ ^"procedure" ~ W ~ (IfExists ~ W)? ~ ProcWithOptionalParams ~ (W ~ WaitApplied)? ~ (W ~ TimeoutOption)? } - ProcWithOptionalParams = ${ Identifier ~ ProcParams? } + ProcWithOptionalParams = ${ Identifier ~ (WO ~ ProcParams)? } RenameProc = ${ ^"alter" ~ W ~ ^"procedure" ~ W ~ OldProc ~ (ProcParams)? ~ W ~ ^"rename" ~ W ~ ^"to" ~ W ~ NewProc ~ (W ~ WaitApplied)? ~ (W ~ TimeoutOption)? } @@ -155,21 +155,21 @@ DDL = _{ CreateTable | DropTable | CreateIndex | DropIndex | CreateSchema | Drop Hash = { ^"hash" } RTree = { ^"rtree" } BitSet = { ^"bitset" } - Parts = !{ Identifier ~ ("," ~ Identifier)* } + Parts = !{ Identifier ~ WO ~ (WO ~ "," ~ WO ~ Identifier)* } IndexOptions = ${ ^"with" ~ WO ~ "(" ~ WO ~ IndexOptionsParams ~ WO ~ ")" } IndexOptionsParams = _{ IndexOptionParam ~ (WO ~ "," ~ WO ~ IndexOptionParam)* } IndexOptionParam = { BloomFpr | PageSize | RangeSize | RunCountPerLevel | RunSizeRatio | Dimension | Distance | Hint } - BloomFpr = !{ ^"bloom_fpr" ~ "=" ~ Decimal } - PageSize = !{ ^"page_size" ~ "=" ~ Unsigned } - RangeSize = !{ ^"range_size" ~ "=" ~ Unsigned } - RunCountPerLevel = !{ ^"run_count_per_level" ~ "=" ~Unsigned } - RunSizeRatio = !{ ^"run_size_ratio" ~ "=" ~ Decimal } - Dimension = !{ ^"dimension" ~ "=" ~ Unsigned } - Distance = !{ ^"distance" ~ "=" ~ (Euclid | Manhattan) } + BloomFpr = !{ ^"bloom_fpr" ~ WO ~ "=" ~ WO ~ Decimal } + PageSize = !{ ^"page_size" ~ WO ~ "=" ~ WO ~ Unsigned } + RangeSize = !{ ^"range_size" ~ WO ~ "=" ~ WO ~ Unsigned } + RunCountPerLevel = !{ ^"run_count_per_level" ~ WO ~ "=" ~ WO ~ Unsigned } + RunSizeRatio = !{ ^"run_size_ratio" ~ WO ~ "=" ~ WO ~ Decimal } + Dimension = !{ ^"dimension" ~ WO ~ "=" ~ WO ~ Unsigned } + Distance = !{ ^"distance" ~ WO ~ "=" ~ WO ~ (Euclid | Manhattan) } Euclid = { ^"euclid" } Manhattan = { ^"manhattan" } - Hint = !{ ^"hint" ~ "=" ~ (True | False) } + Hint = !{ ^"hint" ~ WO ~ "=" ~ WO ~ (True | False) } DropIndex = ${ ^"drop" ~ W ~ ^"index" ~ W ~ (IfExists ~ W)? ~ Identifier ~ (W ~ WaitApplied)? ~ (W ~ TimeoutOption)? } CreateSchema = ${ ^"create" ~ W ~ ^"schema" ~ W ~ (IfNotExists ~ W)? ~ Identifier } @@ -214,7 +214,7 @@ DDL = _{ CreateTable | DropTable | CreateIndex | DropIndex | CreateSchema | Drop Block = ${ CallProc ~ (W ~ DqlOption)? } CallProc = ${ ^"call"~ W ~ Identifier ~ WO ~ "(" ~ WO ~ ProcValues ~ WO ~ ")" } - ProcValues = !{ (ProcValue ~ ("," ~ ProcValue)*)? } + ProcValues = !{ (ProcValue ~ WO ~ (WO ~ "," ~ WO ~ ProcValue)*)? } ProcValue = _{ Literal | Parameter } ExplainQuery = _{ Explain } @@ -232,7 +232,7 @@ Query = { (SelectFull | Values | Insert | Update | Delete) ~ DqlOption? } ExceptOp = @{ (^"except" ~ W ~ ^"distinct") | ^"except" } UnionAllOp = @{ ^"union" ~ W ~ ^"all" } Cte = ${ Identifier ~ (WO ~ CteColumns)? ~ W ~ ^"as" ~ WO ~ "(" ~ WO ~ (SelectStatement | Values) ~ WO ~ ")" } - CteColumns = _{ "(" ~ CteColumn ~ (WO ~ "," ~ WO ~ CteColumn)* ~ ")" } + CteColumns = _{ "(" ~ WO ~ CteColumn ~ (WO ~ "," ~ WO ~ CteColumn)* ~ WO ~ ")" } CteColumn = @{ Identifier } Select = ${ ^"select" ~ W ~ Projection ~ (W ~ SelectMainBody)? } SelectMainBody = _{ ^"from" ~ W ~ Scan ~ (W ~ Join)* ~ @@ -260,43 +260,37 @@ Query = { (SelectFull | Values | Insert | Update | Delete) ~ DqlOption? } OrderFlag = _{ Asc | Desc } Asc = { ^"asc" } Desc = { ^"desc" } - SubQuery = !{ "(" ~ (SelectFull | Values) ~ ")" } + SubQuery = !{ "(" ~ WO ~ (SelectFull | Values) ~ WO ~ ")" } Insert = ${ ^"insert" ~ W ~ ^"into" ~ W ~ Table ~ WO ~ (TargetColumns ~ W)? ~ (SelectFull | Values) ~ (W ~ OnConflict)? } - TargetColumns = !{ "(" ~ Identifier ~ ("," ~ Identifier)* ~ ")" } + TargetColumns = !{ "(" ~ WO ~ Identifier ~ (WO ~ "," ~ WO ~ Identifier)* ~ WO ~ ")" } OnConflict = _{ ^"on" ~ W ~ ^"conflict" ~ W ~ ^"do" ~ W ~ (DoNothing | DoReplace | DoFail) } DoReplace = { ^"replace" } DoNothing = { ^"nothing" } DoFail = { ^"fail" } Update = ${ ^"update" ~ W ~ ScanTable ~ W ~ ^"set" ~ W ~ UpdateList ~ (W ~ (UpdateFrom | WhereClause))? } UpdateList = { UpdateItem ~ (WO ~ "," ~ WO ~ UpdateItem)* } - UpdateItem = !{ Identifier ~ "=" ~ Expr } + UpdateItem = !{ Identifier ~ WO ~ "=" ~ WO ~ Expr } UpdateFrom = _{ ^"from" ~ W ~ Scan ~ (W ~ ^"where" ~ W ~ Expr)? } Values = { ^"values" ~ WO ~ ValuesRows } ValuesRows = _{ Row ~ (WO ~ "," ~ WO ~ Row)* } - DqlOption = !{ ^"option" ~ "(" ~ OprionParams ~ ")" } - OprionParams = _{ OptionParam ~ (WO ~ "," ~ WO ~ OptionParam)* } + DqlOption = !{ ^"option" ~ WO ~ "(" ~ WO ~ OptionParams ~ WO ~ ")" } + OptionParams = _{ OptionParam ~ (WO ~ "," ~ WO ~ OptionParam)* } OptionParam = _{ VdbeOpcodeMax | MotionRowMax } - Timeout = !{ ^"timeout" ~ "=" ~ Duration } + Timeout = !{ ^"timeout" ~ WO ~ "=" ~ WO ~ Duration } Duration = @{ Unsigned ~ ("." ~ Unsigned)? } TimeoutOption = _{ ^"option" ~ WO ~ "(" ~ WO ~ Timeout ~ WO ~ ")" } - VdbeOpcodeMax = { ^"sql_vdbe_opcode_max" ~ "=" ~ (Unsigned | Parameter) } - MotionRowMax = { ^"sql_motion_row_max" ~ "=" ~ (Unsigned | Parameter) } + VdbeOpcodeMax = { ^"sql_vdbe_opcode_max" ~ WO ~ "=" ~ WO ~ (Unsigned | Parameter) } + MotionRowMax = { ^"sql_motion_row_max" ~ WO ~ "=" ~ WO ~ (Unsigned | Parameter) } Delete = ${ ^"delete" ~ W ~ ^"from" ~ W ~ ScanTable ~ (W ~ ^"where" ~ W ~ DeleteFilter)? } DeleteFilter = { Expr } Identifier = @{ DelimitedIdentifier | RegularIdentifier } DelimitedIdentifier = @{ ("\"" ~ ((!("\"") ~ ANY) | "\"\"")* ~ "\"") } RegularIdentifier = @{ !KeywordCoverage ~ - RegularIdentifierFirstApplicableSymbol ~ - RegularIdentifierApplicableSymbol* ~ - &IdentifierInapplicableSymbol } - RegularIdentifierFirstApplicableSymbol = { !(IdentifierInapplicableSymbol | ASCII_DIGIT) ~ ANY } - RegularIdentifierApplicableSymbol = { !IdentifierInapplicableSymbol ~ ANY } - IdentifierInapplicableSymbol = { WHITESPACE | "." | "," | "(" | EOF | ")" | "\"" | ":" - | "'" | ArithInfixOp | ConcatInfixOp | NotEq | GtEq - | Gt | LtEq | Lt | Eq } - KeywordCoverage = { Keyword ~ IdentifierInapplicableSymbol } + (XID_START | "_" | "#" ) ~ + (XID_CONTINUE | "_" | "#" )* } + KeywordCoverage = { Keyword ~ !(XID_CONTINUE | "_" | "#" ) } // Note: In case two keywords with the same prefix are met, shorter ones must go after longest. // E.g. ^"in" must go after ^"insert" because keywords traversal stops on the first match. // Please, try to keep the list in alphabetical order. @@ -451,7 +445,9 @@ NotFlag = { ^"not" } // REPEATING_RULES = _{ REPEATING_RULE ~ (WO ~ "," ~ WO ~ REPEATING_RULE)* } // * Note: Keep in mind that rule like `!{ RULE_1 ~ ("," ~ RULE_2)* }` will consume possible // whitespaces that follow the `RULE_1` rule. -WHITESPACE = _{ " " | "\t" | "\n" | "\r\n" } +EOL_COMMENT = @{ "--" ~ (!NEWLINE ~ ANY)* ~ NEWLINE? } +BLOCK_COMMENT = @{ "/*" ~ (!("*/") ~ ANY)* ~ "*/" } +WHITESPACE = _{ " "+ | NEWLINE | "\t" | EOL_COMMENT | BLOCK_COMMENT } W = _{ WHITESPACE+ } WO = _{ WHITESPACE* } EOF = { EOI | ";" }