diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 2f8d3a402c..65a9943279 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -17,6 +17,7 @@ use self::strings::{is_string_literal_start, scan_byte_char_or_string, scan_char mod comments; use self::comments::{scan_comment, scan_shebang}; +/// Break a string up into its component tokens pub fn tokenize(text: &str) -> Vec { let mut text = text; let mut acc = Vec::new(); @@ -28,6 +29,7 @@ pub fn tokenize(text: &str) -> Vec { } acc } +/// Get the next token from a string pub fn next_token(text: &str) -> Token { assert!(!text.is_empty()); let mut ptr = Ptr::new(text); diff --git a/src/lib.rs b/src/lib.rs index 39b01a1cb3..87a9d11eaf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,20 @@ +//! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256]. +//! +//! The intent is to be an IDE-ready parser, i.e. one that offers +//! +//! - easy and fast incremental re-parsing, +//! - graceful handling of errors, and +//! - maintains all information in the source file. +//! +//! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md]. +//! +//! [rfc#2256]: +//! [RFC.md]: + +#![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)] +#![deny(bad_style, unsafe_code, missing_docs)] +//#![warn(unreachable_pub)] // rust-lang/rust#47816 + extern crate unicode_xid; mod text; @@ -6,17 +23,20 @@ mod lexer; mod parser; #[cfg_attr(rustfmt, rustfmt_skip)] +#[allow(missing_docs)] pub mod syntax_kinds; pub use text::{TextRange, TextUnit}; pub use tree::{File, FileBuilder, Node, Sink, SyntaxKind, Token}; pub use lexer::{next_token, tokenize}; pub use parser::parse; +/// Utilities for simple uses of the parser. pub mod utils { use std::fmt::Write; use {File, Node}; + /// Parse a file and create a string representation of the resulting parse tree. pub fn dump_tree(file: &File) -> String { let mut result = String::new(); go(file.root(), &mut result, 0); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d04ed1e75c..0f8f2ce0cc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6,6 +6,7 @@ use tree::TOMBSTONE; mod event_parser; use self::event_parser::Event; +/// Parse a sequence of tokens into the representative node tree pub fn parse(text: String, tokens: &[Token]) -> File { let events = event_parser::parse(&text, tokens); from_events_to_file(text, tokens, events) diff --git a/src/text.rs b/src/text.rs index ac1a54a758..4084bf44e8 100644 --- a/src/text.rs +++ b/src/text.rs @@ -1,14 +1,17 @@ use std::fmt; use std::ops; +/// An text position in a source file #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TextUnit(u32); impl TextUnit { + /// The positional offset required for one character pub fn len_of_char(c: char) -> TextUnit { TextUnit(c.len_utf8() as u32) } + #[allow(missing_docs)] pub fn new(val: u32) -> TextUnit { TextUnit(val) } @@ -64,6 +67,7 @@ impl ops::SubAssign for TextUnit { } } +/// A range of text in a source file #[derive(Clone, Copy, PartialEq, Eq)] pub struct TextRange { start: TextUnit, @@ -83,10 +87,12 @@ impl fmt::Display for TextRange { } impl TextRange { + /// An length-0 range of text pub fn empty() -> TextRange { TextRange::from_to(TextUnit::new(0), TextUnit::new(0)) } + /// The left-inclusive range (`[from..to)`) between to points in the text pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { assert!(from <= to, "Invalid text range [{}; {})", from, to); TextRange { @@ -95,22 +101,27 @@ impl TextRange { } } + /// The range from some point over some length pub fn from_len(from: TextUnit, len: TextUnit) -> TextRange { TextRange::from_to(from, from + len) } + /// The starting position of this range pub fn start(&self) -> TextUnit { self.start } + /// The end position of this range pub fn end(&self) -> TextUnit { self.end } + /// The length of this range pub fn len(&self) -> TextUnit { self.end - self.start } + /// Is this range empty of any content? pub fn is_empty(&self) -> bool { self.start() == self.end() } diff --git a/src/tree/file_builder.rs b/src/tree/file_builder.rs index 939922cb2a..738705f022 100644 --- a/src/tree/file_builder.rs +++ b/src/tree/file_builder.rs @@ -1,3 +1,6 @@ +// FIXME(CAD97): I don't understand this mod well enough to stub out docs for the public symbols yet +#![allow(missing_docs)] + use {SyntaxKind, TextRange, TextUnit}; use super::{File, NodeData, NodeIdx, SyntaxErrorData}; @@ -8,6 +11,7 @@ pub trait Sink { fn error(&mut self) -> ErrorBuilder; } +#[derive(Debug)] pub struct FileBuilder { text: String, nodes: Vec, @@ -139,6 +143,7 @@ fn grow(left: &mut TextRange, right: TextRange) { *left = TextRange::from_to(left.start(), right.end()) } +#[derive(Debug)] pub struct ErrorBuilder<'f> { message: Option, builder: &'f mut FileBuilder, diff --git a/src/tree/mod.rs b/src/tree/mod.rs index a330caf54f..aaf048c734 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -7,6 +7,7 @@ use std::cmp; mod file_builder; pub use self::file_builder::{FileBuilder, Sink}; +/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SyntaxKind(pub(crate) u32); @@ -37,12 +38,17 @@ pub(crate) struct SyntaxInfo { pub name: &'static str, } +/// A token of Rust source. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Token { + /// The kind of token. pub kind: SyntaxKind, + /// The length of the token. pub len: TextUnit, } +/// The contents of a Rust source file. +#[derive(Debug)] pub struct File { text: String, nodes: Vec, @@ -50,6 +56,7 @@ pub struct File { } impl File { + /// The root node of this source file. pub fn root<'f>(&'f self) -> Node<'f> { assert!(!self.nodes.is_empty()); Node { @@ -59,6 +66,7 @@ impl File { } } +/// A reference to a token in a Rust source file. #[derive(Clone, Copy)] pub struct Node<'f> { file: &'f File, @@ -66,28 +74,34 @@ pub struct Node<'f> { } impl<'f> Node<'f> { + /// The kind of the token at this node. pub fn kind(&self) -> SyntaxKind { self.data().kind } + /// The text range covered by the token at this node. pub fn range(&self) -> TextRange { self.data().range } + /// The text at this node. pub fn text(&self) -> &'f str { &self.file.text.as_str()[self.range()] } + /// The parent node to this node. pub fn parent(&self) -> Option> { self.as_node(self.data().parent) } + /// The children nodes of this node. pub fn children(&self) -> Children<'f> { Children { next: self.as_node(self.data().first_child), } } + /// Any errors contained in this node. pub fn errors(&self) -> SyntaxErrors<'f> { let pos = self.file.errors.iter().position(|e| e.node == self.idx); let next = pos.map(|i| ErrorIdx(i as u32)).map(|idx| SyntaxError { @@ -123,7 +137,7 @@ impl<'f> cmp::PartialEq> for Node<'f> { impl<'f> cmp::Eq for Node<'f> {} -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub struct SyntaxError<'f> { file: &'f File, idx: ErrorIdx, @@ -162,6 +176,7 @@ impl<'f> SyntaxError<'f> { } } +#[derive(Debug)] pub struct Children<'f> { next: Option>, } @@ -176,6 +191,7 @@ impl<'f> Iterator for Children<'f> { } } +#[derive(Debug)] pub struct SyntaxErrors<'f> { next: Option>, } @@ -190,9 +206,10 @@ impl<'f> Iterator for SyntaxErrors<'f> { } } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] struct NodeIdx(u32); +#[derive(Debug)] struct NodeData { kind: SyntaxKind, range: TextRange, @@ -215,9 +232,10 @@ impl ::std::ops::IndexMut for Vec { } } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] struct ErrorIdx(u32); +#[derive(Debug)] struct SyntaxErrorData { node: NodeIdx, message: String,