mirror of
https://github.com/rust-lang/rust-analyzer
synced 2024-12-25 04:23:25 +00:00
Lexer: start numbers
This commit is contained in:
parent
8103772a10
commit
ddc637c161
8 changed files with 176 additions and 12 deletions
|
@ -4,5 +4,7 @@ Grammar(
|
|||
"IDENT",
|
||||
"UNDERSCORE",
|
||||
"WHITESPACE",
|
||||
"INT_NUMBER",
|
||||
"FLOAT_NUMBER",
|
||||
]
|
||||
)
|
|
@ -20,3 +20,7 @@ pub fn is_whitespace(c: char) -> bool {
|
|||
//https://github.com/behnam/rust-unic/issues/192
|
||||
c.is_whitespace()
|
||||
}
|
||||
|
||||
pub fn is_dec_digit(c: char) -> bool {
|
||||
'0' <= c && c <= '9'
|
||||
}
|
||||
|
|
|
@ -22,16 +22,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
|||
// They are not identifiers, and are handled further down.
|
||||
let ident_start = is_ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext());
|
||||
if ident_start {
|
||||
let is_single_letter = match ptr.next() {
|
||||
None => true,
|
||||
Some(c) if !is_ident_continue(c) => true,
|
||||
_ => false,
|
||||
};
|
||||
if is_single_letter {
|
||||
return if c == '_' { UNDERSCORE } else { IDENT };
|
||||
}
|
||||
ptr.bump_while(is_ident_continue);
|
||||
return IDENT;
|
||||
return scan_ident(c, ptr);
|
||||
}
|
||||
|
||||
if is_whitespace(c) {
|
||||
|
@ -39,9 +30,89 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
|||
return WHITESPACE;
|
||||
}
|
||||
|
||||
if is_dec_digit(c) {
|
||||
return scan_number(c, ptr);
|
||||
}
|
||||
|
||||
ERROR
|
||||
}
|
||||
|
||||
fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
||||
let is_single_letter = match ptr.next() {
|
||||
None => true,
|
||||
Some(c) if !is_ident_continue(c) => true,
|
||||
_ => false,
|
||||
};
|
||||
if is_single_letter {
|
||||
return if c == '_' { UNDERSCORE } else { IDENT };
|
||||
}
|
||||
ptr.bump_while(is_ident_continue);
|
||||
IDENT
|
||||
}
|
||||
|
||||
fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
||||
if c == '0' {
|
||||
match ptr.next().unwrap_or('\0') {
|
||||
'b' | 'o' => {
|
||||
ptr.bump();
|
||||
scan_digits(ptr, false);
|
||||
}
|
||||
'x' => {
|
||||
ptr.bump();
|
||||
scan_digits(ptr, true);
|
||||
}
|
||||
'0'...'9' | '_' | '.' | 'e' | 'E' => {
|
||||
scan_digits(ptr, true);
|
||||
}
|
||||
_ => return INT_NUMBER,
|
||||
}
|
||||
} else {
|
||||
scan_digits(ptr, false);
|
||||
}
|
||||
|
||||
// might be a float, but don't be greedy if this is actually an
|
||||
// integer literal followed by field/method access or a range pattern
|
||||
// (`0..2` and `12.foo()`)
|
||||
if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) {
|
||||
// might have stuff after the ., and if it does, it needs to start
|
||||
// with a number
|
||||
ptr.bump();
|
||||
scan_digits(ptr, false);
|
||||
scan_float_exponent(ptr);
|
||||
return FLOAT_NUMBER;
|
||||
}
|
||||
// it might be a float if it has an exponent
|
||||
if ptr.next_is('e') || ptr.next_is('E') {
|
||||
scan_float_exponent(ptr);
|
||||
return FLOAT_NUMBER;
|
||||
}
|
||||
INT_NUMBER
|
||||
}
|
||||
|
||||
fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
|
||||
while let Some(c) = ptr.next() {
|
||||
match c {
|
||||
'_' | '0'...'9' => {
|
||||
ptr.bump();
|
||||
}
|
||||
'a'...'f' | 'A' ... 'F' if allow_hex => {
|
||||
ptr.bump();
|
||||
}
|
||||
_ => return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_float_exponent(ptr: &mut Ptr) {
|
||||
if ptr.next_is('e') || ptr.next_is('E') {
|
||||
ptr.bump();
|
||||
if ptr.next_is('-') || ptr.next_is('+') {
|
||||
ptr.bump();
|
||||
}
|
||||
scan_digits(ptr, false);
|
||||
}
|
||||
}
|
||||
|
||||
fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
|
||||
match (c, c1, c2) {
|
||||
('r', Some('"'), _) |
|
||||
|
|
|
@ -26,6 +26,18 @@ impl<'s> Ptr<'s> {
|
|||
chars.next()
|
||||
}
|
||||
|
||||
pub fn next_is(&self, c: char) -> bool {
|
||||
self.next() == Some(c)
|
||||
}
|
||||
|
||||
pub fn nnext_is(&self, c: char) -> bool {
|
||||
self.nnext() == Some(c)
|
||||
}
|
||||
|
||||
pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
|
||||
self.nnext().map(p) == Some(true)
|
||||
}
|
||||
|
||||
pub fn bump(&mut self) -> Option<char> {
|
||||
let ch = self.chars().next()?;
|
||||
self.len += TextUnit::len_of_char(ch);
|
||||
|
|
|
@ -5,12 +5,16 @@ pub const ERROR: SyntaxKind = SyntaxKind(0);
|
|||
pub const IDENT: SyntaxKind = SyntaxKind(1);
|
||||
pub const UNDERSCORE: SyntaxKind = SyntaxKind(2);
|
||||
pub const WHITESPACE: SyntaxKind = SyntaxKind(3);
|
||||
pub const INT_NUMBER: SyntaxKind = SyntaxKind(4);
|
||||
pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5);
|
||||
|
||||
static INFOS: [SyntaxInfo; 4] = [
|
||||
static INFOS: [SyntaxInfo; 6] = [
|
||||
SyntaxInfo { name: "ERROR" },
|
||||
SyntaxInfo { name: "IDENT" },
|
||||
SyntaxInfo { name: "UNDERSCORE" },
|
||||
SyntaxInfo { name: "WHITESPACE" },
|
||||
SyntaxInfo { name: "INT_NUMBER" },
|
||||
SyntaxInfo { name: "FLOAT_NUMBER" },
|
||||
];
|
||||
|
||||
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
|
||||
|
|
7
tests/data/lexer/0004_number.rs
Normal file
7
tests/data/lexer/0004_number.rs
Normal file
|
@ -0,0 +1,7 @@
|
|||
0 0b 0o 0x 00 0_ 0. 0e 0E 0z
|
||||
01790 0b1790 0o1790 0x1790aAbBcCdDeEfF 001279 0_1279 0.1279 0e1279 0E1279
|
||||
0..2
|
||||
0.foo()
|
||||
0e+1
|
||||
0.e+1
|
||||
0.0E-2
|
62
tests/data/lexer/0004_number.txt
Normal file
62
tests/data/lexer/0004_number.txt
Normal file
|
@ -0,0 +1,62 @@
|
|||
INT_NUMBER 1
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 2
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 2
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 2
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 2
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 2
|
||||
WHITESPACE 1
|
||||
FLOAT_NUMBER 2
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 2
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 2
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 1
|
||||
IDENT 1
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 5
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 6
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 6
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 18
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 6
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 6
|
||||
WHITESPACE 1
|
||||
FLOAT_NUMBER 6
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 6
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 6
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 1
|
||||
ERROR 1
|
||||
ERROR 1
|
||||
INT_NUMBER 1
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 1
|
||||
ERROR 1
|
||||
IDENT 3
|
||||
ERROR 1
|
||||
ERROR 1
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 2
|
||||
ERROR 1
|
||||
INT_NUMBER 1
|
||||
WHITESPACE 1
|
||||
INT_NUMBER 1
|
||||
ERROR 1
|
||||
IDENT 1
|
||||
ERROR 1
|
||||
INT_NUMBER 1
|
||||
WHITESPACE 1
|
||||
FLOAT_NUMBER 6
|
||||
WHITESPACE 1
|
|
@ -1,5 +1,7 @@
|
|||
Fixmes:
|
||||
|
||||
* Fix `is_whitespace`, add more test
|
||||
* Fix `is_whitespace`, add more tests
|
||||
* Add more thorough tests for idents for XID_Start & XID_Continue
|
||||
* Validate that float and integer literals use digits only of the appropriate
|
||||
base, and are in range
|
||||
|
||||
|
|
Loading…
Reference in a new issue