Lexer: basic chars & lifetimes

This commit is contained in:
Aleksey Kladov 2017-12-31 10:41:42 +03:00
parent 492f6e6b1c
commit d6a922459e
5 changed files with 56 additions and 2 deletions

View file

@ -32,5 +32,7 @@ Grammar(
"FAT_ARROW",
"NEQ",
"NOT",
"CHAR",
"LIFETIME",
]
)

View file

@ -34,7 +34,9 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
}
if is_dec_digit(c) {
return scan_number(c, ptr);
let kind = scan_number(c, ptr);
scan_literal_suffix(ptr);
return kind;
}
// One-byte tokens.
@ -98,6 +100,8 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
}
_ => NOT,
},
// '\'' => scan_char_or_lifetime(ptr),
_ => (),
}
ERROR
@ -116,6 +120,45 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
IDENT
}
fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind {
// Either a character constant 'a' OR a lifetime name 'abc
let c = match ptr.bump() {
Some(c) => c,
None => return CHAR, // TODO: error reporting is upper in the stack
};
// If the character is an ident start not followed by another single
// quote, then this is a lifetime name:
if is_ident_start(c) && !ptr.next_is('\'') {
while ptr.next_is_p(is_ident_continue) {
ptr.bump();
}
// lifetimes shouldn't end with a single quote
// if we find one, then this is an invalid character literal
if ptr.next_is('\'') {
ptr.bump();
return CHAR;
}
return LIFETIME;
}
scan_char_or_byte(ptr);
if !ptr.next_is('\'') {
return CHAR; // TODO: error reporting
}
ptr.bump();
scan_literal_suffix(ptr);
CHAR
}
fn scan_literal_suffix(ptr: &mut Ptr) {
}
fn scan_char_or_byte(ptr: &mut Ptr) {
ptr.bump();
}
fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
match (c, c1, c2) {
('r', Some('"'), _) |

View file

@ -34,6 +34,10 @@ impl<'s> Ptr<'s> {
self.nnext() == Some(c)
}
pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
self.next().map(p) == Some(true)
}
pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
self.nnext().map(p) == Some(true)
}

View file

@ -33,8 +33,10 @@ pub const EQEQ: SyntaxKind = SyntaxKind(28);
pub const FAT_ARROW: SyntaxKind = SyntaxKind(29);
pub const NEQ: SyntaxKind = SyntaxKind(30);
pub const NOT: SyntaxKind = SyntaxKind(31);
pub const CHAR: SyntaxKind = SyntaxKind(32);
pub const LIFETIME: SyntaxKind = SyntaxKind(33);
static INFOS: [SyntaxInfo; 32] = [
static INFOS: [SyntaxInfo; 34] = [
SyntaxInfo { name: "ERROR" },
SyntaxInfo { name: "IDENT" },
SyntaxInfo { name: "UNDERSCORE" },
@ -67,6 +69,8 @@ static INFOS: [SyntaxInfo; 32] = [
SyntaxInfo { name: "FAT_ARROW" },
SyntaxInfo { name: "NEQ" },
SyntaxInfo { name: "NOT" },
SyntaxInfo { name: "CHAR" },
SyntaxInfo { name: "LIFETIME" },
];
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {

View file

@ -4,4 +4,5 @@ Fixmes:
* Add more thorough tests for idents for XID_Start & XID_Continue
* Validate that float and integer literals use digits only of the appropriate
base, and are in range
* Validation for unclosed char literal