diff --git a/src/parse_tree.rs b/src/parse_tree.rs index cac3c5414..eae46c362 100644 --- a/src/parse_tree.rs +++ b/src/parse_tree.rs @@ -12,6 +12,7 @@ use crate::parse_constants::{ }; use crate::tokenizer::TokenizerError; use crate::wchar::prelude::*; +use crate::wcstringutil::count_newlines; /// A struct representing the token type that we use internally. #[derive(Clone, Copy)] @@ -114,6 +115,16 @@ impl ParsedSource { pub fn new(src: WString, ast: Ast) -> Self { ParsedSource { src, ast } } + + // Return a line counter over this source. + pub fn line_counter(self: &Arc) -> LineCounter { + LineCounter { + parsed_source: Pin::new(Arc::clone(self)), + node: std::ptr::null(), + cached_offset: 0, + cached_count: 0, + } + } } pub type ParsedSourceRef = Arc; @@ -182,3 +193,78 @@ pub fn parse_source( Some(Arc::new(ParsedSource::new(src, ast))) } } + +/// A type which assists in returning line numbers. +/// This is a somewhat strange type which both counts line numbers and also holds +/// a reference to a "current" node; this matches the expected usage from parse_execution. +pub struct LineCounter { + /// The parse tree containing the node. + /// This is pinned because we hold a pointer into it. + parsed_source: Pin>, + + /// The node itself. This points into the parsed source, or it may be null. + node: *const NodeType, + + // Cached line number information: the line number of the start of the node, and the number of newlines. + cached_offset: usize, + cached_count: usize, +} + +impl LineCounter { + // Return a line counter for empty source. + pub fn empty() -> Self { + let parsed_source = + Pin::new(parse_source(WString::new(), ParseTreeFlags::default(), None).unwrap()); + LineCounter { + parsed_source, + node: std::ptr::null(), + cached_offset: 0, + cached_count: 0, + } + } + + // Count the number of newlines, leveraging our cache. + pub fn line_offset_of_character_at_offset(&mut self, offset: usize) -> usize { + let src = &self.parsed_source.src; + assert!(offset <= src.len()); + + // Easy hack to handle 0. + if offset == 0 { + return 0; + } + + // We want to return the number of newlines at offsets less than the given offset. + if offset > self.cached_offset { + // Add one for every newline we find in the range [cached_offset, offset). + // The codegen is substantially better when using a char slice than the char iterator. + self.cached_count += count_newlines(&src[self.cached_offset..offset]); + } else if offset < self.cached_offset { + // Subtract one for every newline we find in the range [offset, cached_range.start). + self.cached_count -= count_newlines(&src[offset..self.cached_offset]); + } + self.cached_offset = offset; + self.cached_count + } + + // Returns the 0-based line number of the node. + pub fn line_offset_of_node(&mut self) -> Option { + let src_offset = self.source_offset_of_node()?; + Some(self.line_offset_of_character_at_offset(src_offset)) + } + + // Return the 0 based character offset of the node. + pub fn source_offset_of_node(&mut self) -> Option { + // Safety: any node is valid for the lifetime of the source. + let node = unsafe { self.node.as_ref()? }; + let range = node.try_source_range()?; + Some(range.start()) + } + + // Set the node. The node must belong to the parsed source. + // Returns the original node. + pub fn set_node<'a>(&mut self, node: Option<&'a NodeType>) -> Option<&'a NodeType> { + let node_ptr = node.map_or(std::ptr::null(), |node| node); + let prev = std::mem::replace(&mut self.node, node_ptr); + unsafe { prev.as_ref() } + } +} diff --git a/src/tests/parser.rs b/src/tests/parser.rs index f8ed04a17..9611de929 100644 --- a/src/tests/parser.rs +++ b/src/tests/parser.rs @@ -1,4 +1,4 @@ -use crate::ast::{self, Ast, List, Node, Traversal}; +use crate::ast::{self, Ast, JobPipeline, List, Node, Traversal}; use crate::common::ScopeGuard; use crate::env::EnvStack; use crate::expand::ExpandFlags; @@ -6,6 +6,7 @@ use crate::io::{IoBufferfill, IoChain}; use crate::parse_constants::{ ParseErrorCode, ParseTreeFlags, ParserTestErrorBits, StatementDecoration, }; +use crate::parse_tree::{parse_source, LineCounter}; use crate::parse_util::{parse_util_detect_errors, parse_util_detect_errors_in_argument}; use crate::parser::{CancelBehavior, Parser}; use crate::reader::{reader_pop, reader_push, reader_reset_interrupted, ReaderConfig}; @@ -747,3 +748,74 @@ fn test_cancellation() { reader_reset_interrupted(); signal_clear_cancel(); } + +#[test] +fn test_line_counter() { + let src = L!("echo line1; echo still_line_1;\n\necho line3"); + let ps = parse_source(src.to_owned(), ParseTreeFlags::default(), None) + .expect("Failed to parse source"); + assert!(!ps.ast.errored()); + let mut line_counter = ps.line_counter(); + + // Test line_offset_of_character_at_offset, both forwards and backwards to exercise the cache. + let mut expected = 0; + for (idx, c) in src.chars().enumerate() { + let line_offset = line_counter.line_offset_of_character_at_offset(idx); + assert_eq!(line_offset, expected); + if c == '\n' { + expected += 1; + } + } + for (idx, c) in src.chars().enumerate().rev() { + if c == '\n' { + expected -= 1; + } + let line_offset = line_counter.line_offset_of_character_at_offset(idx); + assert_eq!(line_offset, expected); + } + + fn ref_eq(a: Option<&T>, b: Option<&T>) -> bool { + match (a, b) { + (Some(a), Some(b)) => std::ptr::eq(a, b), + (None, None) => true, + _ => false, + } + } + + let pipelines: Vec<_> = ps.ast.walk().filter_map(|n| n.as_job_pipeline()).collect(); + assert_eq!(pipelines.len(), 3); + let src_offsets = [0, 0, 2]; + assert_eq!(line_counter.source_offset_of_node(), None); + assert_eq!(line_counter.line_offset_of_node(), None); + + let mut last_set = None; + for (idx, &node) in pipelines.iter().enumerate() { + let orig = line_counter.set_node(Some(node)); + assert!(ref_eq(orig, last_set)); + last_set = Some(node); + assert_eq!( + line_counter.source_offset_of_node(), + Some(node.source_range().start()) + ); + assert_eq!(line_counter.line_offset_of_node(), Some(src_offsets[idx])); + } + + for (idx, &node) in pipelines.iter().enumerate().rev() { + let orig = line_counter.set_node(Some(node)); + assert!(ref_eq(orig, last_set)); + last_set = Some(node); + assert_eq!( + line_counter.source_offset_of_node(), + Some(node.source_range().start()) + ); + assert_eq!(line_counter.line_offset_of_node(), Some(src_offsets[idx])); + } +} + +#[test] +fn test_line_counter_empty() { + let mut line_counter = LineCounter::::empty(); + assert_eq!(line_counter.line_offset_of_character_at_offset(0), 0); + assert_eq!(line_counter.line_offset_of_node(), None); + assert_eq!(line_counter.source_offset_of_node(), None); +}