Skip to content

Commit df0d56c

Browse files
authored
Fix the tokenization of < edge cases (#2280)
1 parent 7c4eac3 commit df0d56c

File tree

1 file changed

+102
-8
lines changed

1 file changed

+102
-8
lines changed

src/tokenizer.rs

Lines changed: 102 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1627,6 +1627,9 @@ impl<'a> Tokenizer<'a> {
16271627
chars.next();
16281628
match chars.peek() {
16291629
Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
1630+
// `<=+` and `<=-` are not valid combined operators; treat `<=` as
1631+
// the operator and leave `+`/`-` to be tokenized separately.
1632+
Some('+') | Some('-') => Ok(Some(Token::LtEq)),
16301633
_ => self.start_binop(chars, "<=", Token::LtEq),
16311634
}
16321635
}
@@ -1646,13 +1649,15 @@ impl<'a> Tokenizer<'a> {
16461649
}
16471650
}
16481651
Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
1652+
// `<+` is not a valid combined operator; treat `<` as the operator
1653+
// and leave `+` to be tokenized separately.
1654+
Some('+') => Ok(Some(Token::Lt)),
16491655
Some('-') if self.dialect.supports_geometric_types() => {
1650-
chars.next(); // consume
1651-
match chars.peek() {
1652-
Some('>') => {
1653-
self.consume_for_binop(chars, "<->", Token::TwoWayArrow)
1654-
}
1655-
_ => self.start_binop_opt(chars, "<-", None),
1656+
if chars.peekable.clone().nth(1) == Some('>') {
1657+
chars.next(); // consume `-`
1658+
self.consume_for_binop(chars, "<->", Token::TwoWayArrow)
1659+
} else {
1660+
Ok(Some(Token::Lt))
16561661
}
16571662
}
16581663
Some('^') if self.dialect.supports_geometric_types() => {
@@ -2628,9 +2633,10 @@ fn take_char_from_hex_digits(
26282633
mod tests {
26292634
use super::*;
26302635
use crate::dialect::{
2631-
BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect,
2636+
BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect,
2637+
PostgreSqlDialect, SQLiteDialect,
26322638
};
2633-
use crate::test_utils::{all_dialects_except, all_dialects_where};
2639+
use crate::test_utils::{all_dialects, all_dialects_except, all_dialects_where};
26342640
use core::fmt::Debug;
26352641

26362642
#[test]
@@ -4420,4 +4426,92 @@ mod tests {
44204426
tokens,
44214427
);
44224428
}
4429+
4430+
#[test]
4431+
fn tokenize_lt() {
4432+
all_dialects().tokenizes_to(
4433+
"select a <-50",
4434+
vec![
4435+
Token::make_keyword("select"),
4436+
Token::Whitespace(Whitespace::Space),
4437+
Token::make_word("a", None),
4438+
Token::Whitespace(Whitespace::Space),
4439+
Token::Lt,
4440+
Token::Minus,
4441+
Token::Number("50".to_string(), false),
4442+
],
4443+
);
4444+
all_dialects().tokenizes_to(
4445+
"select a <+50",
4446+
vec![
4447+
Token::make_keyword("select"),
4448+
Token::Whitespace(Whitespace::Space),
4449+
Token::make_word("a", None),
4450+
Token::Whitespace(Whitespace::Space),
4451+
Token::Lt,
4452+
Token::Plus,
4453+
Token::Number("50".to_string(), false),
4454+
],
4455+
);
4456+
all_dialects().tokenizes_to(
4457+
"select a <=-50",
4458+
vec![
4459+
Token::make_keyword("select"),
4460+
Token::Whitespace(Whitespace::Space),
4461+
Token::make_word("a", None),
4462+
Token::Whitespace(Whitespace::Space),
4463+
Token::LtEq,
4464+
Token::Minus,
4465+
Token::Number("50".to_string(), false),
4466+
],
4467+
);
4468+
all_dialects().tokenizes_to(
4469+
"select a <=+50",
4470+
vec![
4471+
Token::make_keyword("select"),
4472+
Token::Whitespace(Whitespace::Space),
4473+
Token::make_word("a", None),
4474+
Token::Whitespace(Whitespace::Space),
4475+
Token::LtEq,
4476+
Token::Plus,
4477+
Token::Number("50".to_string(), false),
4478+
],
4479+
);
4480+
all_dialects_where(|d| d.supports_geometric_types()).tokenizes_to(
4481+
"select a <->b",
4482+
vec![
4483+
Token::make_keyword("select"),
4484+
Token::Whitespace(Whitespace::Space),
4485+
Token::make_word("a", None),
4486+
Token::Whitespace(Whitespace::Space),
4487+
Token::TwoWayArrow,
4488+
Token::make_word("b", None),
4489+
],
4490+
);
4491+
4492+
all_dialects().tokenizes_to(
4493+
"select a <-b",
4494+
vec![
4495+
Token::make_keyword("select"),
4496+
Token::Whitespace(Whitespace::Space),
4497+
Token::make_word("a", None),
4498+
Token::Whitespace(Whitespace::Space),
4499+
Token::Lt,
4500+
Token::Minus,
4501+
Token::make_word("b", None),
4502+
],
4503+
);
4504+
all_dialects().tokenizes_to(
4505+
"select a <+b",
4506+
vec![
4507+
Token::make_keyword("select"),
4508+
Token::Whitespace(Whitespace::Space),
4509+
Token::make_word("a", None),
4510+
Token::Whitespace(Whitespace::Space),
4511+
Token::Lt,
4512+
Token::Plus,
4513+
Token::make_word("b", None),
4514+
],
4515+
);
4516+
}
44234517
}

0 commit comments

Comments
 (0)