Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
picodata
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Container Registry
Model registry
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
core
picodata
Commits
a1e824eb
Commit
a1e824eb
authored
10 months ago
by
Georgy Moshkin
Browse files
Options
Downloads
Patches
Plain Diff
fix: bugs in picodata::util::Lexer
parent
4e639e51
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!1023
feat: rework migration file parsing
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/util.rs
+83
-16
83 additions, 16 deletions
src/util.rs
with
83 additions
and
16 deletions
src/util.rs
+
83
−
16
View file @
a1e824eb
...
...
@@ -727,6 +727,7 @@ pub struct Lexer<'a> {
input
:
&
'a
str
,
utf8_stream
:
std
::
iter
::
Peekable
<
std
::
str
::
CharIndices
<
'a
>>
,
last_token
:
Option
<
TokenInfo
<
'a
>>
,
last_token_was_peeked
:
bool
,
}
#[derive(Debug,
Clone,
Copy,
PartialEq,
Eq,
Hash)]
...
...
@@ -744,19 +745,25 @@ impl<'a> Lexer<'a> {
utf8_stream
:
input
.char_indices
()
.peekable
(),
input
,
last_token
:
None
,
last_token_was_peeked
:
false
,
}
}
#[inline(always)]
pub
fn
peek_token
(
&
mut
self
)
->
Option
<&
TokenInfo
<
'a
>>
{
if
self
.last_token
.is_some
()
{
self
.last_token
.as_ref
()
}
else
{
self
.next_token
()
if
!
self
.last_token_was_peeked
{
self
.next_token
();
self
.last_token_was_peeked
=
true
;
}
self
.last_token
.as_ref
()
}
pub
fn
next_token
(
&
mut
self
)
->
Option
<&
TokenInfo
<
'a
>>
{
if
self
.last_token_was_peeked
{
self
.last_token_was_peeked
=
false
;
return
self
.last_token
.as_ref
();
}
// skip leading whitespace
while
let
Some
(
&
(
_
,
c
))
=
self
.utf8_stream
.peek
()
{
if
!
c
.is_whitespace
()
{
...
...
@@ -774,27 +781,30 @@ impl<'a> Lexer<'a> {
let
mut
utf8_count
=
1
;
match
c
{
c
if
c
.
is_alphanumeric
(
)
=>
{
c
if
is_alphanumeric
_or_underscore
(
c
)
=>
{
while
let
Some
(
&
(
i
,
c
))
=
self
.utf8_stream
.peek
()
{
if
!
c
.
is_alphanumeric
(
)
{
if
!
is_alphanumeric
_or_underscore
(
c
)
{
return
Some
(
self
.update_last_token
(
start
,
i
,
utf8_count
));
}
utf8_count
+=
1
;
_
=
self
.utf8_stream
.next
()
.
unwrap
(
);
_
=
self
.utf8_stream
.next
()
.
expect
(
"peek returned Some"
);
}
// in case stream ended, fall through to handled it at the end
}
'"'
=>
{
while
let
Some
((
_
,
c
))
=
self
.utf8_stream
.next
()
{
'"'
|
'\''
if
self
.quote_escaping_style
==
QuoteEscapingStyle
::
Backslash
=>
{
let
openning_quote
=
c
;
while
let
Some
((
i
,
c
))
=
self
.utf8_stream
.next
()
{
utf8_count
+=
1
;
if
c
==
'\\'
{
// next character is escaped, so always added to the token
utf8_count
+=
1
;
_
=
self
.utf8_stream
.next
()
.unwrap
();
}
else
if
c
==
'"'
{
let
&
(
i
,
_
)
=
self
.utf8_stream
.peek
()
.unwrap
();
return
Some
(
self
.update_last_token
(
start
,
i
,
utf8_count
));
if
self
.utf8_stream
.next
()
.is_some
()
{
utf8_count
+=
1
;
}
else
{
// end of input
}
}
else
if
c
==
openning_quote
{
let
end
=
i
+
c
.len_utf8
();
return
Some
(
self
.update_last_token
(
start
,
end
,
utf8_count
));
}
}
// in case stream ended, fall through to handled it at the end
...
...
@@ -821,6 +831,11 @@ impl<'a> Lexer<'a> {
}
}
#[inline(always)]
fn
is_alphanumeric_or_underscore
(
c
:
char
)
->
bool
{
c
==
'_'
||
c
.is_alphanumeric
()
}
////////////////////////////////////////////////////////////////////////////////
// ...
////////////////////////////////////////////////////////////////////////////////
...
...
@@ -1010,7 +1025,13 @@ mod tests {
#[test]
fn
lexer
()
{
let
mut
lexer
=
Lexer
::
new
(
r##"foo bar1 3baz " \" ' " ,,) "unfinished"##
);
//
//
//
let
mut
lexer
=
Lexer
::
new
(
r##"foo bar1 3baz " \" ' " 'single\'quotes' 'double"in singles',,) "unfinished"##
,
);
let
mut
tokens
=
vec!
[];
while
let
Some
(
token
)
=
lexer
.next_token
()
.copied
()
{
...
...
@@ -1026,6 +1047,8 @@ mod tests {
"bar1"
,
"3baz"
,
"
\"
\\\"
'
\"
"
,
"'single
\\
'quotes'"
,
"'double
\"
in singles'"
,
","
,
","
,
")"
,
...
...
@@ -1033,6 +1056,9 @@ mod tests {
]
);
//
//
//
let
mut
lexer
=
Lexer
::
new
(
" alphanumeric"
);
let
mut
tokens
=
vec!
[];
...
...
@@ -1043,5 +1069,46 @@ mod tests {
}
assert_eq!
(
tokens
,
[
"alphanumeric"
]);
//
//
//
let
mut
lexer
=
Lexer
::
new
(
"quotes_at_the_end''"
);
let
mut
tokens
=
vec!
[];
while
let
Some
(
token
)
=
lexer
.next_token
()
.copied
()
{
assert_eq!
(
token
.text
,
&
lexer
.input
[
token
.start
..
token
.end
]);
assert_eq!
(
token
.utf8_count
,
dbg!
(
token
.text
)
.chars
()
.count
());
tokens
.push
(
token
.text
);
}
assert_eq!
(
tokens
,
[
"quotes_at_the_end"
,
"''"
]);
//
//
//
let
mut
lexer
=
Lexer
::
new
(
"backslash_at_the_end'
\\
"
);
let
mut
tokens
=
vec!
[];
while
let
Some
(
token
)
=
lexer
.next_token
()
.copied
()
{
assert_eq!
(
token
.text
,
&
lexer
.input
[
token
.start
..
token
.end
]);
assert_eq!
(
token
.utf8_count
,
dbg!
(
token
.text
)
.chars
()
.count
());
tokens
.push
(
token
.text
);
}
assert_eq!
(
tokens
,
[
"backslash_at_the_end"
,
"'
\\
"
]);
//
//
//
let
mut
lexer
=
Lexer
::
new
(
"foo bar"
);
assert_eq!
(
lexer
.next_token
()
.unwrap
()
.text
,
"foo"
);
assert_eq!
(
lexer
.peek_token
()
.unwrap
()
.text
,
"bar"
);
assert_eq!
(
lexer
.peek_token
()
.unwrap
()
.text
,
"bar"
);
assert_eq!
(
lexer
.next_token
()
.unwrap
()
.text
,
"bar"
);
assert!
(
lexer
.peek_token
()
.is_none
());
assert!
(
lexer
.peek_token
()
.is_none
());
assert!
(
lexer
.next_token
()
.is_none
());
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment