Lexer: byte strings

This commit is contained in:
Aleksey Kladov 2017-12-31 14:02:55 +03:00
parent f1a840cc38
commit 2f24fb4f2c
6 changed files with 55 additions and 11 deletions

View file

@ -32,7 +32,12 @@ Grammar(
"FAT_ARROW", "FAT_ARROW",
"NEQ", "NEQ",
"NOT", "NOT",
"CHAR",
"LIFETIME", "LIFETIME",
"CHAR",
"BYTE",
"STRING",
"RAW_STRING",
"BYTE_STRING",
"RAW_BYTE_STRING",
] ]
) )

View file

@ -33,30 +33,51 @@ pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
match c { match c {
'\'' => { '\'' => {
scan_byte(ptr); scan_byte(ptr);
CHAR BYTE
} }
'"' => { '"' => {
scan_byte_string(ptr); scan_byte_string(ptr);
CHAR BYTE_STRING
} }
'r' => { 'r' => {
scan_raw_byte_string(ptr); scan_raw_byte_string(ptr);
CHAR RAW_BYTE_STRING
} }
_ => unreachable!(), _ => unreachable!(),
} }
} }
fn scan_byte(ptr: &mut Ptr) { fn scan_byte(ptr: &mut Ptr) {
if ptr.next_is('\'') {
ptr.bump();
return
}
ptr.bump();
if ptr.next_is('\'') {
ptr.bump();
return
}
} }
fn scan_byte_string(ptr: &mut Ptr) { fn scan_byte_string(ptr: &mut Ptr) {
while let Some(c) = ptr.bump() {
if c == '"' {
return
}
}
} }
fn scan_raw_byte_string(ptr: &mut Ptr) { fn scan_raw_byte_string(ptr: &mut Ptr) {
if !ptr.next_is('"') {
return
}
ptr.bump();
while let Some(c) = ptr.bump() {
if c == '"' {
return
}
}
} }
fn scan_char_or_byte(ptr: &mut Ptr) { fn scan_char_or_byte(ptr: &mut Ptr) {

View file

@ -33,10 +33,15 @@ pub const EQEQ: SyntaxKind = SyntaxKind(28);
pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); pub const FAT_ARROW: SyntaxKind = SyntaxKind(29);
pub const NEQ: SyntaxKind = SyntaxKind(30); pub const NEQ: SyntaxKind = SyntaxKind(30);
pub const NOT: SyntaxKind = SyntaxKind(31); pub const NOT: SyntaxKind = SyntaxKind(31);
pub const CHAR: SyntaxKind = SyntaxKind(32); pub const LIFETIME: SyntaxKind = SyntaxKind(32);
pub const LIFETIME: SyntaxKind = SyntaxKind(33); pub const CHAR: SyntaxKind = SyntaxKind(33);
pub const BYTE: SyntaxKind = SyntaxKind(34);
pub const STRING: SyntaxKind = SyntaxKind(35);
pub const RAW_STRING: SyntaxKind = SyntaxKind(36);
pub const BYTE_STRING: SyntaxKind = SyntaxKind(37);
pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38);
static INFOS: [SyntaxInfo; 34] = [ static INFOS: [SyntaxInfo; 39] = [
SyntaxInfo { name: "ERROR" }, SyntaxInfo { name: "ERROR" },
SyntaxInfo { name: "IDENT" }, SyntaxInfo { name: "IDENT" },
SyntaxInfo { name: "UNDERSCORE" }, SyntaxInfo { name: "UNDERSCORE" },
@ -69,8 +74,13 @@ static INFOS: [SyntaxInfo; 34] = [
SyntaxInfo { name: "FAT_ARROW" }, SyntaxInfo { name: "FAT_ARROW" },
SyntaxInfo { name: "NEQ" }, SyntaxInfo { name: "NEQ" },
SyntaxInfo { name: "NOT" }, SyntaxInfo { name: "NOT" },
SyntaxInfo { name: "CHAR" },
SyntaxInfo { name: "LIFETIME" }, SyntaxInfo { name: "LIFETIME" },
SyntaxInfo { name: "CHAR" },
SyntaxInfo { name: "BYTE" },
SyntaxInfo { name: "STRING" },
SyntaxInfo { name: "RAW_STRING" },
SyntaxInfo { name: "BYTE_STRING" },
SyntaxInfo { name: "RAW_BYTE_STRING" },
]; ];
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {

View file

@ -0,0 +1 @@
b'' b'x' b"foo" br""

View file

@ -0,0 +1,7 @@
BYTE 3 "b\'\'"
WHITESPACE 1 " "
BYTE 4 "b\'x\'"
WHITESPACE 1 " "
BYTE_STRING 6 "b\"foo\""
WHITESPACE 1 " "
RAW_BYTE_STRING 4 "br\"\""

View file

@ -5,4 +5,4 @@ Fixmes:
* Validate that float and integer literals use digits only of the appropriate * Validate that float and integer literals use digits only of the appropriate
base, and are in range base, and are in range
* Validation for unclosed char literal * Validation for unclosed char literal
* Strings are completely wrong: more tests and comparison with libsyntax.