parser/
output.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
//! See [`Output`]

use crate::SyntaxKind;

/// Output of the parser -- a DFS traversal of a concrete syntax tree.
///
/// Use the [`Output::iter`] method to iterate over traversal steps and consume
/// a syntax tree.
///
/// In a sense, this is just a sequence of [`SyntaxKind`]-colored parenthesis
/// interspersed into the original [`crate::Input`]. The output is fundamentally
/// coordinated with the input and `n_input_tokens` refers to the number of
/// times [`crate::Input::push`] was called.
#[derive(Default)]
pub struct Output {
    /// 32-bit encoding of events. If LSB is zero, then that's an index into the
    /// error vector. Otherwise, it's one of the thee other variants, with data encoded as
    ///
    ///     |16 bit kind|8 bit n_input_tokens|4 bit tag|4 bit leftover|
    ///
    event: Vec<u32>,
    error: Vec<String>,
}

#[derive(Debug)]
pub enum Step<'a> {
    Token { kind: SyntaxKind, n_input_tokens: u8 },
    FloatSplit { ends_in_dot: bool },
    Enter { kind: SyntaxKind },
    Exit,
    Error { msg: &'a str },
}

impl Output {
    const EVENT_MASK: u32 = 0b1;
    const TAG_MASK: u32 = 0x0000_00F0;
    const N_INPUT_TOKEN_MASK: u32 = 0x0000_FF00;
    const KIND_MASK: u32 = 0xFFFF_0000;

    const ERROR_SHIFT: u32 = Self::EVENT_MASK.trailing_ones();
    const TAG_SHIFT: u32 = Self::TAG_MASK.trailing_zeros();
    const N_INPUT_TOKEN_SHIFT: u32 = Self::N_INPUT_TOKEN_MASK.trailing_zeros();
    const KIND_SHIFT: u32 = Self::KIND_MASK.trailing_zeros();

    const TOKEN_EVENT: u8 = 0;
    const ENTER_EVENT: u8 = 1;
    const EXIT_EVENT: u8 = 2;
    const SPLIT_EVENT: u8 = 3;

    pub fn iter(&self) -> impl Iterator<Item = Step<'_>> {
        self.event.iter().map(|&event| {
            if event & Self::EVENT_MASK == 0 {
                return Step::Error {
                    msg: self.error[(event as usize) >> Self::ERROR_SHIFT].as_str(),
                };
            }
            let tag = ((event & Self::TAG_MASK) >> Self::TAG_SHIFT) as u8;
            match tag {
                Self::TOKEN_EVENT => {
                    let kind: SyntaxKind =
                        (((event & Self::KIND_MASK) >> Self::KIND_SHIFT) as u16).into();
                    let n_input_tokens =
                        ((event & Self::N_INPUT_TOKEN_MASK) >> Self::N_INPUT_TOKEN_SHIFT) as u8;
                    Step::Token { kind, n_input_tokens }
                }
                Self::ENTER_EVENT => {
                    let kind: SyntaxKind =
                        (((event & Self::KIND_MASK) >> Self::KIND_SHIFT) as u16).into();
                    Step::Enter { kind }
                }
                Self::EXIT_EVENT => Step::Exit,
                Self::SPLIT_EVENT => {
                    Step::FloatSplit { ends_in_dot: event & Self::N_INPUT_TOKEN_MASK != 0 }
                }
                _ => unreachable!(),
            }
        })
    }

    pub(crate) fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
        let e = ((kind as u16 as u32) << Self::KIND_SHIFT)
            | ((n_tokens as u32) << Self::N_INPUT_TOKEN_SHIFT)
            | Self::EVENT_MASK;
        self.event.push(e)
    }

    pub(crate) fn float_split_hack(&mut self, ends_in_dot: bool) {
        let e = ((Self::SPLIT_EVENT as u32) << Self::TAG_SHIFT)
            | ((ends_in_dot as u32) << Self::N_INPUT_TOKEN_SHIFT)
            | Self::EVENT_MASK;
        self.event.push(e);
    }

    pub(crate) fn enter_node(&mut self, kind: SyntaxKind) {
        let e = ((kind as u16 as u32) << Self::KIND_SHIFT)
            | ((Self::ENTER_EVENT as u32) << Self::TAG_SHIFT)
            | Self::EVENT_MASK;
        self.event.push(e)
    }

    pub(crate) fn leave_node(&mut self) {
        let e = ((Self::EXIT_EVENT as u32) << Self::TAG_SHIFT) | Self::EVENT_MASK;
        self.event.push(e)
    }

    pub(crate) fn error(&mut self, error: String) {
        let idx = self.error.len();
        self.error.push(error);
        let e = (idx as u32) << Self::ERROR_SHIFT;
        self.event.push(e);
    }
}