Lexer: symbols

This commit is contained in:
Aleksey Kladov 2017-12-30 16:30:37 +03:00
parent 3e91e8b77d
commit fad3e50987
6 changed files with 144 additions and 7 deletions

View file

@ -6,5 +6,24 @@ Grammar(
"WHITESPACE", "WHITESPACE",
"INT_NUMBER", "INT_NUMBER",
"FLOAT_NUMBER", "FLOAT_NUMBER",
"SEMI",
"COMMA",
"DOT",
"DOTDOT",
"DOTDOTDOT",
"DOTDOTEQ",
"L_PAREN",
"R_PAREN",
"L_CURLY",
"R_CURLY",
"L_BRACK",
"R_BRACK",
"AT",
"POUND",
"TILDE",
"QUESTION",
"COLON",
"COLONCOLON",
"DOLLAR",
] ]
) )

View file

@ -37,6 +37,47 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
return scan_number(c, ptr); return scan_number(c, ptr);
} }
// One-byte tokens.
match c {
';' => return SEMI,
',' => return COMMA,
'(' => return L_PAREN,
')' => return R_PAREN,
'{' => return L_CURLY,
'}' => return R_CURLY,
'[' => return L_BRACK,
']' => return R_BRACK,
'@' => return AT,
'#' => return POUND,
'~' => return TILDE,
'?' => return QUESTION,
'$' => return DOLLAR,
'.' => return match (ptr.next(), ptr.nnext()) {
(Some('.'), Some('.')) => {
ptr.bump();
ptr.bump();
DOTDOTDOT
},
(Some('.'), Some('=')) => {
ptr.bump();
ptr.bump();
DOTDOTEQ
},
(Some('.'), _) => {
ptr.bump();
DOTDOT
},
_ => DOT
},
':' => return match ptr.next() {
Some(':') => {
ptr.bump();
COLONCOLON
}
_ => COLON
},
_ => (),
}
ERROR ERROR
} }

View file

@ -7,14 +7,52 @@ pub const UNDERSCORE: SyntaxKind = SyntaxKind(2);
pub const WHITESPACE: SyntaxKind = SyntaxKind(3); pub const WHITESPACE: SyntaxKind = SyntaxKind(3);
pub const INT_NUMBER: SyntaxKind = SyntaxKind(4); pub const INT_NUMBER: SyntaxKind = SyntaxKind(4);
pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5); pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5);
pub const SEMI: SyntaxKind = SyntaxKind(6);
pub const COMMA: SyntaxKind = SyntaxKind(7);
pub const DOT: SyntaxKind = SyntaxKind(8);
pub const DOTDOT: SyntaxKind = SyntaxKind(9);
pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10);
pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11);
pub const L_PAREN: SyntaxKind = SyntaxKind(12);
pub const R_PAREN: SyntaxKind = SyntaxKind(13);
pub const L_CURLY: SyntaxKind = SyntaxKind(14);
pub const R_CURLY: SyntaxKind = SyntaxKind(15);
pub const L_BRACK: SyntaxKind = SyntaxKind(16);
pub const R_BRACK: SyntaxKind = SyntaxKind(17);
pub const AT: SyntaxKind = SyntaxKind(18);
pub const POUND: SyntaxKind = SyntaxKind(19);
pub const TILDE: SyntaxKind = SyntaxKind(20);
pub const QUESTION: SyntaxKind = SyntaxKind(21);
pub const COLON: SyntaxKind = SyntaxKind(22);
pub const COLONCOLON: SyntaxKind = SyntaxKind(23);
pub const DOLLAR: SyntaxKind = SyntaxKind(24);
static INFOS: [SyntaxInfo; 6] = [ static INFOS: [SyntaxInfo; 25] = [
SyntaxInfo { name: "ERROR" }, SyntaxInfo { name: "ERROR" },
SyntaxInfo { name: "IDENT" }, SyntaxInfo { name: "IDENT" },
SyntaxInfo { name: "UNDERSCORE" }, SyntaxInfo { name: "UNDERSCORE" },
SyntaxInfo { name: "WHITESPACE" }, SyntaxInfo { name: "WHITESPACE" },
SyntaxInfo { name: "INT_NUMBER" }, SyntaxInfo { name: "INT_NUMBER" },
SyntaxInfo { name: "FLOAT_NUMBER" }, SyntaxInfo { name: "FLOAT_NUMBER" },
SyntaxInfo { name: "SEMI" },
SyntaxInfo { name: "COMMA" },
SyntaxInfo { name: "DOT" },
SyntaxInfo { name: "DOTDOT" },
SyntaxInfo { name: "DOTDOTDOT" },
SyntaxInfo { name: "DOTDOTEQ" },
SyntaxInfo { name: "L_PAREN" },
SyntaxInfo { name: "R_PAREN" },
SyntaxInfo { name: "L_CURLY" },
SyntaxInfo { name: "R_CURLY" },
SyntaxInfo { name: "L_BRACK" },
SyntaxInfo { name: "R_BRACK" },
SyntaxInfo { name: "AT" },
SyntaxInfo { name: "POUND" },
SyntaxInfo { name: "TILDE" },
SyntaxInfo { name: "QUESTION" },
SyntaxInfo { name: "COLON" },
SyntaxInfo { name: "COLONCOLON" },
SyntaxInfo { name: "DOLLAR" },
]; ];
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {

View file

@ -38,22 +38,21 @@ WHITESPACE 1 " "
INT_NUMBER 6 "0E1279" INT_NUMBER 6 "0E1279"
WHITESPACE 1 "\n" WHITESPACE 1 "\n"
INT_NUMBER 1 "0" INT_NUMBER 1 "0"
ERROR 1 "." DOTDOT 2 ".."
ERROR 1 "."
INT_NUMBER 1 "2" INT_NUMBER 1 "2"
WHITESPACE 1 "\n" WHITESPACE 1 "\n"
INT_NUMBER 1 "0" INT_NUMBER 1 "0"
ERROR 1 "." DOT 1 "."
IDENT 3 "foo" IDENT 3 "foo"
ERROR 1 "(" L_PAREN 1 "("
ERROR 1 ")" R_PAREN 1 ")"
WHITESPACE 1 "\n" WHITESPACE 1 "\n"
INT_NUMBER 2 "0e" INT_NUMBER 2 "0e"
ERROR 1 "+" ERROR 1 "+"
INT_NUMBER 1 "1" INT_NUMBER 1 "1"
WHITESPACE 1 "\n" WHITESPACE 1 "\n"
INT_NUMBER 1 "0" INT_NUMBER 1 "0"
ERROR 1 "." DOT 1 "."
IDENT 1 "e" IDENT 1 "e"
ERROR 1 "+" ERROR 1 "+"
INT_NUMBER 1 "1" INT_NUMBER 1 "1"

View file

@ -0,0 +1,3 @@
; , ( ) { } [ ] @ # ~ ? $
. .. ... ..=
: ::

View file

@ -0,0 +1,37 @@
SEMI 1 ";"
WHITESPACE 1 " "
COMMA 1 ","
WHITESPACE 1 " "
L_PAREN 1 "("
WHITESPACE 1 " "
R_PAREN 1 ")"
WHITESPACE 1 " "
L_CURLY 1 "{"
WHITESPACE 1 " "
R_CURLY 1 "}"
WHITESPACE 1 " "
L_BRACK 1 "["
WHITESPACE 1 " "
R_BRACK 1 "]"
WHITESPACE 1 " "
AT 1 "@"
WHITESPACE 1 " "
POUND 1 "#"
WHITESPACE 1 " "
TILDE 1 "~"
WHITESPACE 1 " "
QUESTION 1 "?"
WHITESPACE 1 " "
DOLLAR 1 "$"
WHITESPACE 1 "\n"
DOT 1 "."
WHITESPACE 1 " "
DOTDOT 2 ".."
WHITESPACE 1 " "
DOTDOTDOT 3 "..."
WHITESPACE 1 " "
DOTDOTEQ 3 "..="
WHITESPACE 1 "\n"
COLON 1 ":"
WHITESPACE 1 " "
COLONCOLON 2 "::"