Lexer: basic chars & lifetimes

This commit is contained in:
Aleksey Kladov 2017-12-31 10:41:42 +03:00
parent 492f6e6b1c
commit d6a922459e
5 changed files with 56 additions and 2 deletions

View file

@ -32,5 +32,7 @@ Grammar(
"FAT_ARROW", "FAT_ARROW",
"NEQ", "NEQ",
"NOT", "NOT",
"CHAR",
"LIFETIME",
] ]
) )

View file

@ -34,7 +34,9 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
} }
if is_dec_digit(c) { if is_dec_digit(c) {
return scan_number(c, ptr); let kind = scan_number(c, ptr);
scan_literal_suffix(ptr);
return kind;
} }
// One-byte tokens. // One-byte tokens.
@ -98,6 +100,8 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
} }
_ => NOT, _ => NOT,
}, },
// '\'' => scan_char_or_lifetime(ptr),
_ => (), _ => (),
} }
ERROR ERROR
@ -116,6 +120,45 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
IDENT IDENT
} }
fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind {
// Either a character constant 'a' OR a lifetime name 'abc
let c = match ptr.bump() {
Some(c) => c,
None => return CHAR, // TODO: error reporting is upper in the stack
};
// If the character is an ident start not followed by another single
// quote, then this is a lifetime name:
if is_ident_start(c) && !ptr.next_is('\'') {
while ptr.next_is_p(is_ident_continue) {
ptr.bump();
}
// lifetimes shouldn't end with a single quote
// if we find one, then this is an invalid character literal
if ptr.next_is('\'') {
ptr.bump();
return CHAR;
}
return LIFETIME;
}
scan_char_or_byte(ptr);
if !ptr.next_is('\'') {
return CHAR; // TODO: error reporting
}
ptr.bump();
scan_literal_suffix(ptr);
CHAR
}
fn scan_literal_suffix(ptr: &mut Ptr) {
}
fn scan_char_or_byte(ptr: &mut Ptr) {
ptr.bump();
}
fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
match (c, c1, c2) { match (c, c1, c2) {
('r', Some('"'), _) | ('r', Some('"'), _) |

View file

@ -34,6 +34,10 @@ impl<'s> Ptr<'s> {
self.nnext() == Some(c) self.nnext() == Some(c)
} }
pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
self.next().map(p) == Some(true)
}
pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
self.nnext().map(p) == Some(true) self.nnext().map(p) == Some(true)
} }

View file

@ -33,8 +33,10 @@ pub const EQEQ: SyntaxKind = SyntaxKind(28);
pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); pub const FAT_ARROW: SyntaxKind = SyntaxKind(29);
pub const NEQ: SyntaxKind = SyntaxKind(30); pub const NEQ: SyntaxKind = SyntaxKind(30);
pub const NOT: SyntaxKind = SyntaxKind(31); pub const NOT: SyntaxKind = SyntaxKind(31);
pub const CHAR: SyntaxKind = SyntaxKind(32);
pub const LIFETIME: SyntaxKind = SyntaxKind(33);
static INFOS: [SyntaxInfo; 32] = [ static INFOS: [SyntaxInfo; 34] = [
SyntaxInfo { name: "ERROR" }, SyntaxInfo { name: "ERROR" },
SyntaxInfo { name: "IDENT" }, SyntaxInfo { name: "IDENT" },
SyntaxInfo { name: "UNDERSCORE" }, SyntaxInfo { name: "UNDERSCORE" },
@ -67,6 +69,8 @@ static INFOS: [SyntaxInfo; 32] = [
SyntaxInfo { name: "FAT_ARROW" }, SyntaxInfo { name: "FAT_ARROW" },
SyntaxInfo { name: "NEQ" }, SyntaxInfo { name: "NEQ" },
SyntaxInfo { name: "NOT" }, SyntaxInfo { name: "NOT" },
SyntaxInfo { name: "CHAR" },
SyntaxInfo { name: "LIFETIME" },
]; ];
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {

View file

@ -4,4 +4,5 @@ Fixmes:
* Add more thorough tests for idents for XID_Start & XID_Continue * Add more thorough tests for idents for XID_Start & XID_Continue
* Validate that float and integer literals use digits only of the appropriate * Validate that float and integer literals use digits only of the appropriate
base, and are in range base, and are in range
* Validation for unclosed char literal