2021-12-12 16:06:40 +00:00
|
|
|
//! Input for the parser -- a sequence of tokens.
|
|
|
|
//!
|
|
|
|
//! As of now, parser doesn't have access to the *text* of the tokens, and makes
|
|
|
|
//! decisions based solely on their classification.
|
|
|
|
|
2021-12-12 14:58:45 +00:00
|
|
|
use crate::SyntaxKind;
|
2021-11-14 13:47:13 +00:00
|
|
|
|
2021-11-14 19:13:44 +00:00
|
|
|
#[allow(non_camel_case_types)]
|
2021-11-14 13:47:13 +00:00
|
|
|
type bits = u64;
|
|
|
|
|
|
|
|
/// Main input to the parser.
|
|
|
|
///
|
|
|
|
/// A sequence of tokens represented internally as a struct of arrays.
|
|
|
|
#[derive(Default)]
|
|
|
|
pub struct Tokens {
|
|
|
|
kind: Vec<SyntaxKind>,
|
|
|
|
joint: Vec<bits>,
|
2021-11-14 19:13:44 +00:00
|
|
|
contextual_kw: Vec<SyntaxKind>,
|
2021-11-14 13:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Tokens {
|
2021-12-12 16:17:04 +00:00
|
|
|
#[inline]
|
2021-12-12 15:38:49 +00:00
|
|
|
pub fn push(&mut self, kind: SyntaxKind) {
|
|
|
|
self.push_impl(kind, SyntaxKind::EOF)
|
|
|
|
}
|
2021-12-12 16:06:40 +00:00
|
|
|
/// Sets jointness for the last token we've pushed.
|
|
|
|
///
|
|
|
|
/// This is a separate API rather than an argument to the `push` to make it
|
|
|
|
/// convenient both for textual and mbe tokens. With text, you know whether
|
|
|
|
/// the *previous* token was joint, with mbe, you know whether the *current*
|
|
|
|
/// one is joint. This API allows for styles of usage:
|
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// // In text:
|
|
|
|
/// tokens.was_joint(prev_joint);
|
|
|
|
/// tokens.push(curr);
|
|
|
|
///
|
|
|
|
/// // In MBE:
|
|
|
|
/// token.push(curr);
|
|
|
|
/// tokens.push(curr_joint)
|
|
|
|
/// ```
|
2021-12-12 16:17:04 +00:00
|
|
|
#[inline]
|
2021-12-12 16:22:37 +00:00
|
|
|
pub fn was_joint(&mut self) {
|
|
|
|
self.set_joint(self.len() - 1);
|
2021-11-14 13:47:13 +00:00
|
|
|
}
|
2021-12-12 16:17:04 +00:00
|
|
|
#[inline]
|
2021-11-14 19:13:44 +00:00
|
|
|
pub fn push_ident(&mut self, contextual_kw: SyntaxKind) {
|
2021-12-12 15:38:49 +00:00
|
|
|
self.push_impl(SyntaxKind::IDENT, contextual_kw)
|
2021-11-14 13:47:13 +00:00
|
|
|
}
|
2021-12-12 16:17:04 +00:00
|
|
|
#[inline]
|
2021-12-12 15:38:49 +00:00
|
|
|
fn push_impl(&mut self, kind: SyntaxKind, contextual_kw: SyntaxKind) {
|
2021-11-14 13:47:13 +00:00
|
|
|
let idx = self.len();
|
|
|
|
if idx % (bits::BITS as usize) == 0 {
|
|
|
|
self.joint.push(0);
|
|
|
|
}
|
|
|
|
self.kind.push(kind);
|
2021-11-14 19:13:44 +00:00
|
|
|
self.contextual_kw.push(contextual_kw);
|
2021-11-14 13:47:13 +00:00
|
|
|
}
|
|
|
|
fn set_joint(&mut self, n: usize) {
|
|
|
|
let (idx, b_idx) = self.bit_index(n);
|
|
|
|
self.joint[idx] |= 1 << b_idx;
|
|
|
|
}
|
|
|
|
fn bit_index(&self, n: usize) -> (usize, usize) {
|
|
|
|
let idx = n / (bits::BITS as usize);
|
|
|
|
let b_idx = n % (bits::BITS as usize);
|
|
|
|
(idx, b_idx)
|
|
|
|
}
|
|
|
|
|
2021-12-12 16:17:04 +00:00
|
|
|
fn len(&self) -> usize {
|
2021-11-14 13:47:13 +00:00
|
|
|
self.kind.len()
|
|
|
|
}
|
2021-12-12 16:31:32 +00:00
|
|
|
}
|
2021-11-14 18:37:10 +00:00
|
|
|
|
2021-12-12 16:31:32 +00:00
|
|
|
/// pub(crate) impl used by the parser.
|
|
|
|
impl Tokens {
|
|
|
|
pub(crate) fn kind(&self, idx: usize) -> SyntaxKind {
|
|
|
|
self.kind.get(idx).copied().unwrap_or(SyntaxKind::EOF)
|
|
|
|
}
|
|
|
|
pub(crate) fn contextual_kind(&self, idx: usize) -> SyntaxKind {
|
|
|
|
self.contextual_kw.get(idx).copied().unwrap_or(SyntaxKind::EOF)
|
|
|
|
}
|
|
|
|
pub(crate) fn is_joint(&self, n: usize) -> bool {
|
|
|
|
let (idx, b_idx) = self.bit_index(n);
|
|
|
|
self.joint[idx] & 1 << b_idx != 0
|
2021-11-14 13:47:13 +00:00
|
|
|
}
|
|
|
|
}
|