mirror of
https://github.com/rust-lang/rust-analyzer
synced 2024-12-26 04:53:34 +00:00
Lexer: extract string lexing to a separate file
This commit is contained in:
parent
9d5138bf11
commit
f1a840cc38
2 changed files with 88 additions and 49 deletions
|
@ -10,6 +10,9 @@ use self::classes::*;
|
||||||
mod numbers;
|
mod numbers;
|
||||||
use self::numbers::scan_number;
|
use self::numbers::scan_number;
|
||||||
|
|
||||||
|
mod strings;
|
||||||
|
use self::strings::{string_literal_start, scan_char, scan_byte_char_or_string};
|
||||||
|
|
||||||
pub fn next_token(text: &str) -> Token {
|
pub fn next_token(text: &str) -> Token {
|
||||||
assert!(!text.is_empty());
|
assert!(!text.is_empty());
|
||||||
let mut ptr = Ptr::new(text);
|
let mut ptr = Ptr::new(text);
|
||||||
|
@ -101,7 +104,26 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
||||||
_ => NOT,
|
_ => NOT,
|
||||||
},
|
},
|
||||||
|
|
||||||
'\'' => return scan_char_or_lifetime(ptr),
|
// If the character is an ident start not followed by another single
|
||||||
|
// quote, then this is a lifetime name:
|
||||||
|
'\'' => return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') {
|
||||||
|
ptr.bump();
|
||||||
|
while ptr.next_is_p(is_ident_continue) {
|
||||||
|
ptr.bump();
|
||||||
|
}
|
||||||
|
// lifetimes shouldn't end with a single quote
|
||||||
|
// if we find one, then this is an invalid character literal
|
||||||
|
if ptr.next_is('\'') {
|
||||||
|
ptr.bump();
|
||||||
|
return CHAR; // TODO: error reporting
|
||||||
|
}
|
||||||
|
LIFETIME
|
||||||
|
} else {
|
||||||
|
scan_char(ptr);
|
||||||
|
scan_literal_suffix(ptr);
|
||||||
|
CHAR
|
||||||
|
},
|
||||||
|
'b' => return scan_byte_char_or_string(ptr),
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
ERROR
|
ERROR
|
||||||
|
@ -120,57 +142,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
||||||
IDENT
|
IDENT
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind {
|
|
||||||
// Either a character constant 'a' OR a lifetime name 'abc
|
|
||||||
let c = match ptr.bump() {
|
|
||||||
Some(c) => c,
|
|
||||||
None => return CHAR, // TODO: error reporting is upper in the stack
|
|
||||||
};
|
|
||||||
|
|
||||||
// If the character is an ident start not followed by another single
|
|
||||||
// quote, then this is a lifetime name:
|
|
||||||
if is_ident_start(c) && !ptr.next_is('\'') {
|
|
||||||
while ptr.next_is_p(is_ident_continue) {
|
|
||||||
ptr.bump();
|
|
||||||
}
|
|
||||||
|
|
||||||
// lifetimes shouldn't end with a single quote
|
|
||||||
// if we find one, then this is an invalid character literal
|
|
||||||
if ptr.next_is('\'') {
|
|
||||||
ptr.bump();
|
|
||||||
return CHAR;
|
|
||||||
}
|
|
||||||
return LIFETIME;
|
|
||||||
}
|
|
||||||
scan_char_or_byte(ptr);
|
|
||||||
if !ptr.next_is('\'') {
|
|
||||||
return CHAR; // TODO: error reporting
|
|
||||||
}
|
|
||||||
ptr.bump();
|
|
||||||
scan_literal_suffix(ptr);
|
|
||||||
CHAR
|
|
||||||
}
|
|
||||||
|
|
||||||
fn scan_literal_suffix(ptr: &mut Ptr) {
|
fn scan_literal_suffix(ptr: &mut Ptr) {
|
||||||
if ptr.next_is_p(is_ident_start) {
|
if ptr.next_is_p(is_ident_start) {
|
||||||
ptr.bump();
|
ptr.bump();
|
||||||
}
|
}
|
||||||
ptr.bump_while(is_ident_continue);
|
ptr.bump_while(is_ident_continue);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_char_or_byte(ptr: &mut Ptr) {
|
|
||||||
//FIXME: deal with escape sequencies
|
|
||||||
ptr.bump();
|
|
||||||
}
|
|
||||||
|
|
||||||
fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
|
|
||||||
match (c, c1, c2) {
|
|
||||||
('r', Some('"'), _) |
|
|
||||||
('r', Some('#'), _) |
|
|
||||||
('b', Some('"'), _) |
|
|
||||||
('b', Some('\''), _) |
|
|
||||||
('b', Some('r'), Some('"')) |
|
|
||||||
('b', Some('r'), Some('#')) => true,
|
|
||||||
_ => false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
65
src/lexer/strings.rs
Normal file
65
src/lexer/strings.rs
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
use {SyntaxKind};
|
||||||
|
use syntax_kinds::*;
|
||||||
|
|
||||||
|
use lexer::ptr::Ptr;
|
||||||
|
|
||||||
|
pub(crate) fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
|
||||||
|
match (c, c1, c2) {
|
||||||
|
('r', Some('"'), _) |
|
||||||
|
('r', Some('#'), _) |
|
||||||
|
('b', Some('"'), _) |
|
||||||
|
('b', Some('\''), _) |
|
||||||
|
('b', Some('r'), Some('"')) |
|
||||||
|
('b', Some('r'), Some('#')) => true,
|
||||||
|
_ => false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn scan_char(ptr: &mut Ptr) {
|
||||||
|
if ptr.bump().is_none() {
|
||||||
|
return; // TODO: error reporting is upper in the stack
|
||||||
|
}
|
||||||
|
scan_char_or_byte(ptr);
|
||||||
|
if !ptr.next_is('\'') {
|
||||||
|
return; // TODO: error reporting
|
||||||
|
}
|
||||||
|
ptr.bump();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
|
||||||
|
// unwrapping and not-exhaustive match are ok
|
||||||
|
// because of string_literal_start
|
||||||
|
let c = ptr.bump().unwrap();
|
||||||
|
match c {
|
||||||
|
'\'' => {
|
||||||
|
scan_byte(ptr);
|
||||||
|
CHAR
|
||||||
|
}
|
||||||
|
'"' => {
|
||||||
|
scan_byte_string(ptr);
|
||||||
|
CHAR
|
||||||
|
}
|
||||||
|
'r' => {
|
||||||
|
scan_raw_byte_string(ptr);
|
||||||
|
CHAR
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_byte(ptr: &mut Ptr) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_byte_string(ptr: &mut Ptr) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_raw_byte_string(ptr: &mut Ptr) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_char_or_byte(ptr: &mut Ptr) {
|
||||||
|
//FIXME: deal with escape sequencies
|
||||||
|
ptr.bump();
|
||||||
|
}
|
Loading…
Reference in a new issue