From 2f24fb4f2c5d8708533a1b0155e1e884bd4b2ba2 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 31 Dec 2017 14:02:55 +0300 Subject: [PATCH] Lexer: byte strings --- grammar.ron | 7 ++++++- src/lexer/strings.rs | 31 ++++++++++++++++++++++++++----- src/syntax_kinds.rs | 18 ++++++++++++++---- tests/data/lexer/0008_strings.rs | 1 + tests/data/lexer/0008_strings.txt | 7 +++++++ validation.md | 2 +- 6 files changed, 55 insertions(+), 11 deletions(-) create mode 100644 tests/data/lexer/0008_strings.rs create mode 100644 tests/data/lexer/0008_strings.txt diff --git a/grammar.ron b/grammar.ron index 995d71f814..c0564e9cf9 100644 --- a/grammar.ron +++ b/grammar.ron @@ -32,7 +32,12 @@ Grammar( "FAT_ARROW", "NEQ", "NOT", - "CHAR", "LIFETIME", + "CHAR", + "BYTE", + "STRING", + "RAW_STRING", + "BYTE_STRING", + "RAW_BYTE_STRING", ] ) \ No newline at end of file diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs index 40e5e45281..283ce8feb3 100644 --- a/src/lexer/strings.rs +++ b/src/lexer/strings.rs @@ -33,30 +33,51 @@ pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind { match c { '\'' => { scan_byte(ptr); - CHAR + BYTE } '"' => { scan_byte_string(ptr); - CHAR + BYTE_STRING } 'r' => { scan_raw_byte_string(ptr); - CHAR + RAW_BYTE_STRING } _ => unreachable!(), } } fn scan_byte(ptr: &mut Ptr) { - + if ptr.next_is('\'') { + ptr.bump(); + return + } + ptr.bump(); + if ptr.next_is('\'') { + ptr.bump(); + return + } } fn scan_byte_string(ptr: &mut Ptr) { - + while let Some(c) = ptr.bump() { + if c == '"' { + return + } + } } fn scan_raw_byte_string(ptr: &mut Ptr) { + if !ptr.next_is('"') { + return + } + ptr.bump(); + while let Some(c) = ptr.bump() { + if c == '"' { + return + } + } } fn scan_char_or_byte(ptr: &mut Ptr) { diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index 4c023757ba..4a68acb31f 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs @@ -33,10 +33,15 @@ pub const EQEQ: SyntaxKind = SyntaxKind(28); pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); pub const NEQ: SyntaxKind = SyntaxKind(30); pub const NOT: SyntaxKind = SyntaxKind(31); -pub const CHAR: SyntaxKind = SyntaxKind(32); -pub const LIFETIME: SyntaxKind = SyntaxKind(33); +pub const LIFETIME: SyntaxKind = SyntaxKind(32); +pub const CHAR: SyntaxKind = SyntaxKind(33); +pub const BYTE: SyntaxKind = SyntaxKind(34); +pub const STRING: SyntaxKind = SyntaxKind(35); +pub const RAW_STRING: SyntaxKind = SyntaxKind(36); +pub const BYTE_STRING: SyntaxKind = SyntaxKind(37); +pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38); -static INFOS: [SyntaxInfo; 34] = [ +static INFOS: [SyntaxInfo; 39] = [ SyntaxInfo { name: "ERROR" }, SyntaxInfo { name: "IDENT" }, SyntaxInfo { name: "UNDERSCORE" }, @@ -69,8 +74,13 @@ static INFOS: [SyntaxInfo; 34] = [ SyntaxInfo { name: "FAT_ARROW" }, SyntaxInfo { name: "NEQ" }, SyntaxInfo { name: "NOT" }, - SyntaxInfo { name: "CHAR" }, SyntaxInfo { name: "LIFETIME" }, + SyntaxInfo { name: "CHAR" }, + SyntaxInfo { name: "BYTE" }, + SyntaxInfo { name: "STRING" }, + SyntaxInfo { name: "RAW_STRING" }, + SyntaxInfo { name: "BYTE_STRING" }, + SyntaxInfo { name: "RAW_BYTE_STRING" }, ]; pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { diff --git a/tests/data/lexer/0008_strings.rs b/tests/data/lexer/0008_strings.rs new file mode 100644 index 0000000000..1ffc7bb9d0 --- /dev/null +++ b/tests/data/lexer/0008_strings.rs @@ -0,0 +1 @@ +b'' b'x' b"foo" br"" \ No newline at end of file diff --git a/tests/data/lexer/0008_strings.txt b/tests/data/lexer/0008_strings.txt new file mode 100644 index 0000000000..d4ff4b5581 --- /dev/null +++ b/tests/data/lexer/0008_strings.txt @@ -0,0 +1,7 @@ +BYTE 3 "b\'\'" +WHITESPACE 1 " " +BYTE 4 "b\'x\'" +WHITESPACE 1 " " +BYTE_STRING 6 "b\"foo\"" +WHITESPACE 1 " " +RAW_BYTE_STRING 4 "br\"\"" diff --git a/validation.md b/validation.md index a38b4a96e4..39b5f85fa2 100644 --- a/validation.md +++ b/validation.md @@ -5,4 +5,4 @@ Fixmes: * Validate that float and integer literals use digits only of the appropriate base, and are in range * Validation for unclosed char literal - +* Strings are completely wrong: more tests and comparison with libsyntax.