Factor out line counting

This moves the line counting logic from parse_execution into a new type, in
preparation for further refactoring.
This commit is contained in:
Peter Ammon 2024-06-29 13:49:11 -07:00
parent ad1ea94405
commit 300fcfdba7
No known key found for this signature in database
2 changed files with 159 additions and 1 deletions

View file

@ -12,6 +12,7 @@ use crate::parse_constants::{
};
use crate::tokenizer::TokenizerError;
use crate::wchar::prelude::*;
use crate::wcstringutil::count_newlines;
/// A struct representing the token type that we use internally.
#[derive(Clone, Copy)]
@ -114,6 +115,16 @@ impl ParsedSource {
pub fn new(src: WString, ast: Ast) -> Self {
ParsedSource { src, ast }
}
// Return a line counter over this source.
pub fn line_counter<NodeType: Node>(self: &Arc<Self>) -> LineCounter<NodeType> {
LineCounter {
parsed_source: Pin::new(Arc::clone(self)),
node: std::ptr::null(),
cached_offset: 0,
cached_count: 0,
}
}
}
pub type ParsedSourceRef = Arc<ParsedSource>;
@ -182,3 +193,78 @@ pub fn parse_source(
Some(Arc::new(ParsedSource::new(src, ast)))
}
}
/// A type which assists in returning line numbers.
/// This is a somewhat strange type which both counts line numbers and also holds
/// a reference to a "current" node; this matches the expected usage from parse_execution.
pub struct LineCounter<NodeType: Node> {
/// The parse tree containing the node.
/// This is pinned because we hold a pointer into it.
parsed_source: Pin<Arc<ParsedSource>>,
/// The node itself. This points into the parsed source, or it may be null.
node: *const NodeType,
// Cached line number information: the line number of the start of the node, and the number of newlines.
cached_offset: usize,
cached_count: usize,
}
impl<NodeType: Node> LineCounter<NodeType> {
// Return a line counter for empty source.
pub fn empty() -> Self {
let parsed_source =
Pin::new(parse_source(WString::new(), ParseTreeFlags::default(), None).unwrap());
LineCounter {
parsed_source,
node: std::ptr::null(),
cached_offset: 0,
cached_count: 0,
}
}
// Count the number of newlines, leveraging our cache.
pub fn line_offset_of_character_at_offset(&mut self, offset: usize) -> usize {
let src = &self.parsed_source.src;
assert!(offset <= src.len());
// Easy hack to handle 0.
if offset == 0 {
return 0;
}
// We want to return the number of newlines at offsets less than the given offset.
if offset > self.cached_offset {
// Add one for every newline we find in the range [cached_offset, offset).
// The codegen is substantially better when using a char slice than the char iterator.
self.cached_count += count_newlines(&src[self.cached_offset..offset]);
} else if offset < self.cached_offset {
// Subtract one for every newline we find in the range [offset, cached_range.start).
self.cached_count -= count_newlines(&src[offset..self.cached_offset]);
}
self.cached_offset = offset;
self.cached_count
}
// Returns the 0-based line number of the node.
pub fn line_offset_of_node(&mut self) -> Option<usize> {
let src_offset = self.source_offset_of_node()?;
Some(self.line_offset_of_character_at_offset(src_offset))
}
// Return the 0 based character offset of the node.
pub fn source_offset_of_node(&mut self) -> Option<usize> {
// Safety: any node is valid for the lifetime of the source.
let node = unsafe { self.node.as_ref()? };
let range = node.try_source_range()?;
Some(range.start())
}
// Set the node. The node must belong to the parsed source.
// Returns the original node.
pub fn set_node<'a>(&mut self, node: Option<&'a NodeType>) -> Option<&'a NodeType> {
let node_ptr = node.map_or(std::ptr::null(), |node| node);
let prev = std::mem::replace(&mut self.node, node_ptr);
unsafe { prev.as_ref() }
}
}

View file

@ -1,4 +1,4 @@
use crate::ast::{self, Ast, List, Node, Traversal};
use crate::ast::{self, Ast, JobPipeline, List, Node, Traversal};
use crate::common::ScopeGuard;
use crate::env::EnvStack;
use crate::expand::ExpandFlags;
@ -6,6 +6,7 @@ use crate::io::{IoBufferfill, IoChain};
use crate::parse_constants::{
ParseErrorCode, ParseTreeFlags, ParserTestErrorBits, StatementDecoration,
};
use crate::parse_tree::{parse_source, LineCounter};
use crate::parse_util::{parse_util_detect_errors, parse_util_detect_errors_in_argument};
use crate::parser::{CancelBehavior, Parser};
use crate::reader::{reader_pop, reader_push, reader_reset_interrupted, ReaderConfig};
@ -747,3 +748,74 @@ fn test_cancellation() {
reader_reset_interrupted();
signal_clear_cancel();
}
#[test]
fn test_line_counter() {
let src = L!("echo line1; echo still_line_1;\n\necho line3");
let ps = parse_source(src.to_owned(), ParseTreeFlags::default(), None)
.expect("Failed to parse source");
assert!(!ps.ast.errored());
let mut line_counter = ps.line_counter();
// Test line_offset_of_character_at_offset, both forwards and backwards to exercise the cache.
let mut expected = 0;
for (idx, c) in src.chars().enumerate() {
let line_offset = line_counter.line_offset_of_character_at_offset(idx);
assert_eq!(line_offset, expected);
if c == '\n' {
expected += 1;
}
}
for (idx, c) in src.chars().enumerate().rev() {
if c == '\n' {
expected -= 1;
}
let line_offset = line_counter.line_offset_of_character_at_offset(idx);
assert_eq!(line_offset, expected);
}
fn ref_eq<T>(a: Option<&T>, b: Option<&T>) -> bool {
match (a, b) {
(Some(a), Some(b)) => std::ptr::eq(a, b),
(None, None) => true,
_ => false,
}
}
let pipelines: Vec<_> = ps.ast.walk().filter_map(|n| n.as_job_pipeline()).collect();
assert_eq!(pipelines.len(), 3);
let src_offsets = [0, 0, 2];
assert_eq!(line_counter.source_offset_of_node(), None);
assert_eq!(line_counter.line_offset_of_node(), None);
let mut last_set = None;
for (idx, &node) in pipelines.iter().enumerate() {
let orig = line_counter.set_node(Some(node));
assert!(ref_eq(orig, last_set));
last_set = Some(node);
assert_eq!(
line_counter.source_offset_of_node(),
Some(node.source_range().start())
);
assert_eq!(line_counter.line_offset_of_node(), Some(src_offsets[idx]));
}
for (idx, &node) in pipelines.iter().enumerate().rev() {
let orig = line_counter.set_node(Some(node));
assert!(ref_eq(orig, last_set));
last_set = Some(node);
assert_eq!(
line_counter.source_offset_of_node(),
Some(node.source_range().start())
);
assert_eq!(line_counter.line_offset_of_node(), Some(src_offsets[idx]));
}
}
#[test]
fn test_line_counter_empty() {
let mut line_counter = LineCounter::<JobPipeline>::empty();
assert_eq!(line_counter.line_offset_of_character_at_offset(0), 0);
assert_eq!(line_counter.line_offset_of_node(), None);
assert_eq!(line_counter.source_offset_of_node(), None);
}