diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs index d441b826bc..b380117e63 100644 --- a/src/lexer/ptr.rs +++ b/src/lexer/ptr.rs @@ -56,6 +56,7 @@ impl<'s> Ptr<'s> { } fn chars(&self) -> Chars { - self.text[self.len.0 as usize ..].chars() + let len: u32 = self.len.into(); + self.text[len as usize ..].chars() } } diff --git a/src/text.rs b/src/text.rs index 31e67b4560..c3ef1ac8ea 100644 --- a/src/text.rs +++ b/src/text.rs @@ -2,9 +2,7 @@ use std::fmt; use std::ops; #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct TextUnit( - pub(crate) u32 -); +pub struct TextUnit(u32); impl TextUnit { pub fn len_of_char(c: char) -> TextUnit { diff --git a/tests/data/lexer/0001_hello.txt b/tests/data/lexer/0001_hello.txt index e0b6a1f109..27a5940a9c 100644 --- a/tests/data/lexer/0001_hello.txt +++ b/tests/data/lexer/0001_hello.txt @@ -1,3 +1,3 @@ -IDENT 5 -WHITESPACE 1 -IDENT 5 +IDENT 5 "hello" +WHITESPACE 1 " " +IDENT 5 "world" diff --git a/tests/data/lexer/0002_whitespace.txt b/tests/data/lexer/0002_whitespace.txt index 4b9885e4a2..01d260918e 100644 --- a/tests/data/lexer/0002_whitespace.txt +++ b/tests/data/lexer/0002_whitespace.txt @@ -1,12 +1,12 @@ -IDENT 1 -WHITESPACE 1 -IDENT 1 -WHITESPACE 2 -IDENT 1 -WHITESPACE 1 -IDENT 1 -WHITESPACE 2 -IDENT 1 -WHITESPACE 1 -IDENT 1 -WHITESPACE 1 +IDENT 1 "a" +WHITESPACE 1 " " +IDENT 1 "b" +WHITESPACE 2 " " +IDENT 1 "c" +WHITESPACE 1 "\n" +IDENT 1 "d" +WHITESPACE 2 "\n\n" +IDENT 1 "e" +WHITESPACE 1 "\t" +IDENT 1 "f" +WHITESPACE 1 "\n" diff --git a/tests/data/lexer/0003_ident.txt b/tests/data/lexer/0003_ident.txt index eec82fb91d..4a0d5c0531 100644 --- a/tests/data/lexer/0003_ident.txt +++ b/tests/data/lexer/0003_ident.txt @@ -1,14 +1,14 @@ -IDENT 3 -WHITESPACE 1 -IDENT 4 -WHITESPACE 1 -IDENT 4 -WHITESPACE 1 -UNDERSCORE 1 -WHITESPACE 1 -IDENT 2 -WHITESPACE 1 -IDENT 1 -WHITESPACE 1 -IDENT 12 -WHITESPACE 1 +IDENT 3 "foo" +WHITESPACE 1 " " +IDENT 4 "foo_" +WHITESPACE 1 " " +IDENT 4 "_foo" +WHITESPACE 1 " " +UNDERSCORE 1 "_" +WHITESPACE 1 " " +IDENT 2 "__" +WHITESPACE 1 " " +IDENT 1 "x" +WHITESPACE 1 " " +IDENT 12 "привет" +WHITESPACE 1 "\n" diff --git a/tests/data/lexer/0004_number.txt b/tests/data/lexer/0004_number.txt index e9ad8410d7..7dedd2cacb 100644 --- a/tests/data/lexer/0004_number.txt +++ b/tests/data/lexer/0004_number.txt @@ -1,62 +1,62 @@ -INT_NUMBER 1 -WHITESPACE 1 -INT_NUMBER 2 -WHITESPACE 1 -INT_NUMBER 2 -WHITESPACE 1 -INT_NUMBER 2 -WHITESPACE 1 -INT_NUMBER 2 -WHITESPACE 1 -INT_NUMBER 2 -WHITESPACE 1 -FLOAT_NUMBER 2 -WHITESPACE 1 -INT_NUMBER 2 -WHITESPACE 1 -INT_NUMBER 2 -WHITESPACE 1 -INT_NUMBER 1 -IDENT 1 -WHITESPACE 1 -INT_NUMBER 5 -WHITESPACE 1 -INT_NUMBER 6 -WHITESPACE 1 -INT_NUMBER 6 -WHITESPACE 1 -INT_NUMBER 18 -WHITESPACE 1 -INT_NUMBER 6 -WHITESPACE 1 -INT_NUMBER 6 -WHITESPACE 1 -FLOAT_NUMBER 6 -WHITESPACE 1 -INT_NUMBER 6 -WHITESPACE 1 -INT_NUMBER 6 -WHITESPACE 1 -INT_NUMBER 1 -ERROR 1 -ERROR 1 -INT_NUMBER 1 -WHITESPACE 1 -INT_NUMBER 1 -ERROR 1 -IDENT 3 -ERROR 1 -ERROR 1 -WHITESPACE 1 -INT_NUMBER 2 -ERROR 1 -INT_NUMBER 1 -WHITESPACE 1 -INT_NUMBER 1 -ERROR 1 -IDENT 1 -ERROR 1 -INT_NUMBER 1 -WHITESPACE 1 -FLOAT_NUMBER 6 -WHITESPACE 1 +INT_NUMBER 1 "0" +WHITESPACE 1 " " +INT_NUMBER 2 "0b" +WHITESPACE 1 " " +INT_NUMBER 2 "0o" +WHITESPACE 1 " " +INT_NUMBER 2 "0x" +WHITESPACE 1 " " +INT_NUMBER 2 "00" +WHITESPACE 1 " " +INT_NUMBER 2 "0_" +WHITESPACE 1 " " +FLOAT_NUMBER 2 "0." +WHITESPACE 1 " " +INT_NUMBER 2 "0e" +WHITESPACE 1 " " +INT_NUMBER 2 "0E" +WHITESPACE 1 " " +INT_NUMBER 1 "0" +IDENT 1 "z" +WHITESPACE 1 "\n" +INT_NUMBER 5 "01790" +WHITESPACE 1 " " +INT_NUMBER 6 "0b1790" +WHITESPACE 1 " " +INT_NUMBER 6 "0o1790" +WHITESPACE 1 " " +INT_NUMBER 18 "0x1790aAbBcCdDeEfF" +WHITESPACE 1 " " +INT_NUMBER 6 "001279" +WHITESPACE 1 " " +INT_NUMBER 6 "0_1279" +WHITESPACE 1 " " +FLOAT_NUMBER 6 "0.1279" +WHITESPACE 1 " " +INT_NUMBER 6 "0e1279" +WHITESPACE 1 " " +INT_NUMBER 6 "0E1279" +WHITESPACE 1 "\n" +INT_NUMBER 1 "0" +ERROR 1 "." +ERROR 1 "." +INT_NUMBER 1 "2" +WHITESPACE 1 "\n" +INT_NUMBER 1 "0" +ERROR 1 "." +IDENT 3 "foo" +ERROR 1 "(" +ERROR 1 ")" +WHITESPACE 1 "\n" +INT_NUMBER 2 "0e" +ERROR 1 "+" +INT_NUMBER 1 "1" +WHITESPACE 1 "\n" +INT_NUMBER 1 "0" +ERROR 1 "." +IDENT 1 "e" +ERROR 1 "+" +INT_NUMBER 1 "1" +WHITESPACE 1 "\n" +FLOAT_NUMBER 6 "0.0E-2" +WHITESPACE 1 "\n" diff --git a/tests/lexer.rs b/tests/lexer.rs index a3c8916b1e..6a9bab66bd 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -31,6 +31,7 @@ fn lexer_test_cases() -> Vec { acc.push(path); } } + acc.sort(); acc } @@ -38,7 +39,7 @@ fn lexer_test_case(path: &Path) { let actual = { let text = file::get_text(path).unwrap(); let tokens = tokenize(&text); - dump_tokens(&tokens) + dump_tokens(&tokens, &text) }; let expected = file::get_text(&path.with_extension("txt")).unwrap(); let expected = expected.as_str(); @@ -64,10 +65,15 @@ fn tokenize(text: &str) -> Vec { acc } -fn dump_tokens(tokens: &[Token]) -> String { +fn dump_tokens(tokens: &[Token], text: &str) -> String { let mut acc = String::new(); + let mut offset = 0; for token in tokens { - write!(acc, "{:?} {}\n", token.kind, token.len).unwrap() + let len: u32 = token.len.into(); + let len = len as usize; + let token_text = &text[offset..offset + len]; + offset += len; + write!(acc, "{:?} {} {:?}\n", token.kind, token.len, token_text).unwrap() } acc } \ No newline at end of file