mirror of
https://github.com/rust-lang/rust-analyzer
synced 2024-12-25 12:33:33 +00:00
Cleanup character classes
This commit is contained in:
parent
171baf4c48
commit
770ecd4ecd
8 changed files with 73 additions and 31 deletions
|
@ -1,6 +1,8 @@
|
|||
Grammar(
|
||||
syntax_kinds: [
|
||||
"ERROR",
|
||||
"IDENT",
|
||||
"UNDERSCORE",
|
||||
"WHITESPACE",
|
||||
]
|
||||
)
|
22
src/lexer/classes.rs
Normal file
22
src/lexer/classes.rs
Normal file
|
@ -0,0 +1,22 @@
|
|||
use unicode_xid::UnicodeXID;
|
||||
|
||||
pub fn is_ident_start(c: char) -> bool {
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && UnicodeXID::is_xid_start(c))
|
||||
}
|
||||
|
||||
pub fn is_ident_continue(c: char) -> bool {
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && UnicodeXID::is_xid_continue(c))
|
||||
}
|
||||
|
||||
pub fn is_whitespace(c: char) -> bool {
|
||||
//FIXME: use is_pattern_whitespace
|
||||
//https://github.com/behnam/rust-unic/issues/192
|
||||
c.is_whitespace()
|
||||
}
|
|
@ -1,11 +1,12 @@
|
|||
use unicode_xid::UnicodeXID;
|
||||
|
||||
use {Token, SyntaxKind};
|
||||
use syntax_kinds::*;
|
||||
|
||||
mod ptr;
|
||||
use self::ptr::Ptr;
|
||||
|
||||
mod classes;
|
||||
use self::classes::*;
|
||||
|
||||
pub fn next_token(text: &str) -> Token {
|
||||
assert!(!text.is_empty());
|
||||
let mut ptr = Ptr::new(text);
|
||||
|
@ -19,38 +20,20 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
|||
// Note: r as in r" or r#" is part of a raw string literal,
|
||||
// b as in b' is part of a byte literal.
|
||||
// They are not identifiers, and are handled further down.
|
||||
let ident_start = ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext());
|
||||
let ident_start = is_ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext());
|
||||
if ident_start {
|
||||
loop {
|
||||
match ptr.next() {
|
||||
Some(c) if ident_continue(c) => {
|
||||
ptr.bump();
|
||||
},
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
IDENT
|
||||
} else {
|
||||
WHITESPACE
|
||||
ptr.bump_while(is_ident_continue);
|
||||
return IDENT;
|
||||
}
|
||||
}
|
||||
|
||||
fn ident_start(c: char) -> bool {
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && UnicodeXID::is_xid_start(c))
|
||||
}
|
||||
if is_whitespace(c) {
|
||||
ptr.bump_while(is_whitespace);
|
||||
return WHITESPACE;
|
||||
}
|
||||
|
||||
fn ident_continue(c: char) -> bool {
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && UnicodeXID::is_xid_continue(c))
|
||||
return ERROR
|
||||
}
|
||||
|
||||
|
||||
fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
|
||||
match (c, c1, c2) {
|
||||
('r', Some('"'), _) |
|
||||
|
|
|
@ -32,6 +32,17 @@ impl<'s> Ptr<'s> {
|
|||
Some(ch)
|
||||
}
|
||||
|
||||
pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) {
|
||||
loop {
|
||||
match self.next() {
|
||||
Some(c) if pred(c) => {
|
||||
self.bump();
|
||||
},
|
||||
_ => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn chars(&self) -> Chars {
|
||||
self.text[self.len.0 as usize ..].chars()
|
||||
}
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
// Generated from grammar.ron
|
||||
use tree::{SyntaxKind, SyntaxInfo};
|
||||
|
||||
pub const IDENT: SyntaxKind = SyntaxKind(0);
|
||||
pub const WHITESPACE: SyntaxKind = SyntaxKind(1);
|
||||
pub const ERROR: SyntaxKind = SyntaxKind(0);
|
||||
pub const IDENT: SyntaxKind = SyntaxKind(1);
|
||||
pub const UNDERSCORE: SyntaxKind = SyntaxKind(2);
|
||||
pub const WHITESPACE: SyntaxKind = SyntaxKind(3);
|
||||
|
||||
static INFOS: [SyntaxInfo; 2] = [
|
||||
static INFOS: [SyntaxInfo; 4] = [
|
||||
SyntaxInfo { name: "ERROR" },
|
||||
SyntaxInfo { name: "IDENT" },
|
||||
SyntaxInfo { name: "UNDERSCORE" },
|
||||
SyntaxInfo { name: "WHITESPACE" },
|
||||
];
|
||||
|
||||
|
|
4
tests/data/lexer/0002_whitespace.rs
Normal file
4
tests/data/lexer/0002_whitespace.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
a b c
|
||||
d
|
||||
|
||||
e f
|
12
tests/data/lexer/0002_whitespace.txt
Normal file
12
tests/data/lexer/0002_whitespace.txt
Normal file
|
@ -0,0 +1,12 @@
|
|||
IDENT 1
|
||||
WHITESPACE 1
|
||||
IDENT 1
|
||||
WHITESPACE 2
|
||||
IDENT 1
|
||||
WHITESPACE 1
|
||||
IDENT 1
|
||||
WHITESPACE 2
|
||||
IDENT 1
|
||||
WHITESPACE 1
|
||||
IDENT 1
|
||||
WHITESPACE 1
|
4
validation.md
Normal file
4
validation.md
Normal file
|
@ -0,0 +1,4 @@
|
|||
Fixmes:
|
||||
|
||||
* Fix `is_whitespace`, add more test
|
||||
|
Loading…
Reference in a new issue