mirror of
https://github.com/fish-shell/fish-shell
synced 2024-11-10 15:14:44 +00:00
Port parse_util_compute_indents
This commit is contained in:
parent
c25cc8df5d
commit
09ffac5a0a
5 changed files with 442 additions and 481 deletions
|
@ -58,7 +58,6 @@ include_cpp! {
|
|||
generate!("get_flog_file_fd")
|
||||
generate!("log_extra_to_flog_file")
|
||||
|
||||
generate!("indent_visitor_t")
|
||||
generate!("fish_wcwidth")
|
||||
generate!("fish_wcswidth")
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//! Various mostly unrelated utility functions related to parsing, loading and evaluating fish code.
|
||||
use crate::ast::{self, Ast, Keyword, Leaf, List, Node, NodeFfi, NodeVisitor};
|
||||
use crate::ast::{self, Ast, Keyword, Leaf, List, Node, NodeVisitor};
|
||||
use crate::common::{
|
||||
escape_string, unescape_string, valid_var_name, valid_var_name_char, EscapeFlags,
|
||||
EscapeStringStyle, UnescapeFlags, UnescapeStringStyle,
|
||||
|
@ -9,17 +9,18 @@ use crate::expand::{
|
|||
BRACE_SEP, INTERNAL_SEPARATOR, VARIABLE_EXPAND, VARIABLE_EXPAND_EMPTY, VARIABLE_EXPAND_SINGLE,
|
||||
};
|
||||
use crate::ffi;
|
||||
use crate::ffi::indent_visitor_t;
|
||||
use crate::ffi_tests::add_test;
|
||||
use crate::future_feature_flags::{feature_test, FeatureFlag};
|
||||
use crate::operation_context::OperationContext;
|
||||
use crate::parse_constants::{
|
||||
parse_error_offset_source_start, ParseError, ParseErrorCode, ParseErrorList, ParseKeyword,
|
||||
ParserTestErrorBits, PipelinePosition, StatementDecoration, ERROR_BAD_VAR_CHAR1,
|
||||
ERROR_BRACKETED_VARIABLE1, ERROR_BRACKETED_VARIABLE_QUOTED1, ERROR_NOT_ARGV_AT,
|
||||
ERROR_NOT_ARGV_COUNT, ERROR_NOT_ARGV_STAR, ERROR_NOT_PID, ERROR_NOT_STATUS, ERROR_NO_VAR_NAME,
|
||||
INVALID_BREAK_ERR_MSG, INVALID_CONTINUE_ERR_MSG, INVALID_PIPELINE_CMD_ERR_MSG,
|
||||
PARSER_TEST_ERROR, PARSER_TEST_INCOMPLETE, PARSE_FLAG_LEAVE_UNTERMINATED, PARSE_FLAG_NONE,
|
||||
ParseTokenType, ParserTestErrorBits, PipelinePosition, StatementDecoration,
|
||||
ERROR_BAD_VAR_CHAR1, ERROR_BRACKETED_VARIABLE1, ERROR_BRACKETED_VARIABLE_QUOTED1,
|
||||
ERROR_NOT_ARGV_AT, ERROR_NOT_ARGV_COUNT, ERROR_NOT_ARGV_STAR, ERROR_NOT_PID, ERROR_NOT_STATUS,
|
||||
ERROR_NO_VAR_NAME, INVALID_BREAK_ERR_MSG, INVALID_CONTINUE_ERR_MSG,
|
||||
INVALID_PIPELINE_CMD_ERR_MSG, PARSER_TEST_ERROR, PARSER_TEST_INCOMPLETE,
|
||||
PARSE_FLAG_ACCEPT_INCOMPLETE_TOKENS, PARSE_FLAG_CONTINUE_AFTER_ERROR,
|
||||
PARSE_FLAG_INCLUDE_COMMENTS, PARSE_FLAG_LEAVE_UNTERMINATED, PARSE_FLAG_NONE,
|
||||
UNKNOWN_BUILTIN_ERR_MSG,
|
||||
};
|
||||
use crate::tokenizer::{
|
||||
|
@ -27,12 +28,12 @@ use crate::tokenizer::{
|
|||
TOK_SHOW_COMMENTS,
|
||||
};
|
||||
use crate::wchar::{wstr, WString, L};
|
||||
use crate::wchar_ffi::WCharToFFI;
|
||||
use crate::wchar_ffi::{WCharFromFFI, WCharToFFI};
|
||||
use crate::wcstringutil::truncate;
|
||||
use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
|
||||
use crate::wutil::{wgettext, wgettext_fmt};
|
||||
use cxx::CxxWString;
|
||||
use std::ops;
|
||||
use std::pin::Pin;
|
||||
use widestring_suffix::widestrs;
|
||||
|
||||
/// Handles slices: the square brackets in an expression like $foo[5..4]
|
||||
|
@ -723,48 +724,243 @@ pub fn parse_util_escape_string_with_quote(
|
|||
result
|
||||
}
|
||||
|
||||
/// Given a string, parse it as fish code and then return the indents. The return value has the same
|
||||
/// size as the string.
|
||||
pub fn parse_util_compute_indents(src: &wstr) -> Vec<i32> {
|
||||
// Make a vector the same size as the input string, which contains the indents. Initialize them
|
||||
// to 0.
|
||||
let mut indents = vec![0; src.len()];
|
||||
|
||||
// Simple trick: if our source does not contain a newline, then all indents are 0.
|
||||
if !src.chars().any(|c| c == '\n') {
|
||||
return indents;
|
||||
}
|
||||
|
||||
// Parse the string. We pass continue_after_error to produce a forest; the trailing indent of
|
||||
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
|
||||
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
|
||||
// were a case item list.
|
||||
let ast = Ast::parse(
|
||||
src,
|
||||
PARSE_FLAG_CONTINUE_AFTER_ERROR
|
||||
| PARSE_FLAG_INCLUDE_COMMENTS
|
||||
| PARSE_FLAG_ACCEPT_INCOMPLETE_TOKENS
|
||||
| PARSE_FLAG_LEAVE_UNTERMINATED,
|
||||
None,
|
||||
);
|
||||
{
|
||||
let mut iv = IndentVisitor::new(src, &mut indents);
|
||||
iv.visit(ast.top());
|
||||
iv.record_line_continuations_until(iv.indents.len());
|
||||
iv.indents[iv.last_leaf_end..].fill(iv.last_indent);
|
||||
|
||||
// All newlines now get the *next* indent.
|
||||
// For example, in this code:
|
||||
// if true
|
||||
// stuff
|
||||
// the newline "belongs" to the if statement as it ends its job.
|
||||
// But when rendered, it visually belongs to the job list.
|
||||
|
||||
let mut idx = src.len();
|
||||
let mut next_indent = iv.last_indent;
|
||||
let src = src.as_char_slice();
|
||||
while idx != 0 {
|
||||
idx -= 1;
|
||||
if src[idx] == '\n' {
|
||||
let empty_middle_line = src.get(idx + 1) == Some(&'\n');
|
||||
if !empty_middle_line {
|
||||
iv.indents[idx] = next_indent;
|
||||
}
|
||||
} else {
|
||||
next_indent = iv.indents[idx];
|
||||
}
|
||||
}
|
||||
// Add an extra level of indentation to continuation lines.
|
||||
for mut idx in iv.line_continuations {
|
||||
loop {
|
||||
indents[idx] = indents[idx].wrapping_add(1);
|
||||
idx += 1;
|
||||
if idx == src.len() || src[idx] == '\n' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
indents
|
||||
}
|
||||
|
||||
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
|
||||
// visiting its children.
|
||||
struct IndentVisitor<'a> {
|
||||
companion: Pin<&'a mut indent_visitor_t>,
|
||||
// companion: Pin<&'a mut indent_visitor_t>,
|
||||
// The one-past-the-last index of the most recently encountered leaf node.
|
||||
// We use this to populate the indents even if there's no tokens in the range.
|
||||
last_leaf_end: usize,
|
||||
|
||||
// The last indent which we assigned.
|
||||
last_indent: i32,
|
||||
|
||||
// The source we are indenting.
|
||||
src: &'a wstr,
|
||||
|
||||
// List of indents, which we populate.
|
||||
indents: &'a mut Vec<i32>,
|
||||
|
||||
// Initialize our starting indent to -1, as our top-level node is a job list which
|
||||
// will immediately increment it.
|
||||
indent: i32,
|
||||
|
||||
// List of locations of escaped newline characters.
|
||||
line_continuations: Vec<usize>,
|
||||
}
|
||||
impl<'a> IndentVisitor<'a> {
|
||||
fn new(src: &'a wstr, indents: &'a mut Vec<i32>) -> Self {
|
||||
Self {
|
||||
last_leaf_end: 0,
|
||||
last_indent: -1,
|
||||
src,
|
||||
indents,
|
||||
indent: -1,
|
||||
line_continuations: vec![],
|
||||
}
|
||||
}
|
||||
/// \return whether a maybe_newlines node contains at least one newline.
|
||||
fn has_newline(&self, nls: &ast::MaybeNewlines) -> bool {
|
||||
nls.source(self.src).chars().any(|c| c == '\n')
|
||||
}
|
||||
fn record_line_continuations_until(&mut self, offset: usize) {
|
||||
let gap_text = &self.src[self.last_leaf_end..offset];
|
||||
let gap_text = gap_text.as_char_slice();
|
||||
let Some(escaped_nl) = gap_text.windows(2).position(|w| *w == ['\\', '\n']) else {
|
||||
return;
|
||||
};
|
||||
if gap_text[..escaped_nl].contains(&'#') {
|
||||
return;
|
||||
}
|
||||
let mut newline = escaped_nl + 1;
|
||||
// The gap text might contain multiple newlines if there are multiple lines that
|
||||
// don't contain an AST node, for example, comment lines, or lines containing only
|
||||
// the escaped newline.
|
||||
loop {
|
||||
self.line_continuations.push(self.last_leaf_end + newline);
|
||||
match gap_text[newline + 1..].iter().position(|c| *c == '\n') {
|
||||
Some(nextnl) => newline = newline + 1 + nextnl,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> NodeVisitor<'a> for IndentVisitor<'a> {
|
||||
// Default implementation is to just visit children.
|
||||
fn visit(&mut self, node: &'a dyn Node) {
|
||||
let ffi_node = NodeFfi::new(node);
|
||||
let dec = self
|
||||
.companion
|
||||
.as_mut()
|
||||
.visit((&ffi_node as *const NodeFfi<'_>).cast());
|
||||
let mut inc = 0;
|
||||
let mut dec = 0;
|
||||
use ast::{Category, Type};
|
||||
match node.typ() {
|
||||
Type::job_list | Type::andor_job_list => {
|
||||
// Job lists are never unwound.
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
|
||||
// Increment indents for conditions in headers (#1665).
|
||||
Type::job_conjunction => {
|
||||
if [Type::while_header, Type::if_clause].contains(&node.parent().unwrap().typ()) {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Increment indents for job_continuation_t if it contains a newline.
|
||||
// This is a bit of a hack - it indents cases like:
|
||||
// cmd1 |
|
||||
// ....cmd2
|
||||
// but avoids "double indenting" if there's no newline:
|
||||
// cmd1 | while cmd2
|
||||
// ....cmd3
|
||||
// end
|
||||
// See #7252.
|
||||
Type::job_continuation => {
|
||||
if self.has_newline(&node.as_job_continuation().unwrap().newlines) {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Likewise for && and ||.
|
||||
Type::job_conjunction_continuation => {
|
||||
if self.has_newline(&node.as_job_conjunction_continuation().unwrap().newlines) {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
}
|
||||
|
||||
Type::case_item_list => {
|
||||
// Here's a hack. Consider:
|
||||
// switch abc
|
||||
// cas
|
||||
//
|
||||
// fish will see that 'cas' is not valid inside a switch statement because it is
|
||||
// not "case". It will then unwind back to the top level job list, producing a
|
||||
// parse tree like:
|
||||
//
|
||||
// job_list
|
||||
// switch_job
|
||||
// <err>
|
||||
// normal_job
|
||||
// cas
|
||||
//
|
||||
// And so we will think that the 'cas' job is at the same level as the switch.
|
||||
// To address this, if we see that the switch statement was not closed, do not
|
||||
// decrement the indent afterwards.
|
||||
inc = 1;
|
||||
let switchs = node.parent().unwrap().as_switch_statement().unwrap();
|
||||
dec = if switchs.end.has_source() { 1 } else { 0 };
|
||||
}
|
||||
Type::token_base => {
|
||||
if node.parent().unwrap().typ() == Type::begin_header
|
||||
&& node.as_token().unwrap().token_type() == ParseTokenType::end
|
||||
{
|
||||
// The newline after "begin" is optional, so it is part of the header.
|
||||
// The header is not in the indented block, so indent the newline here.
|
||||
if node.source(self.src) == L!("\n") {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let range = node.source_range();
|
||||
if range.length() > 0 && node.category() == Category::leaf {
|
||||
self.record_line_continuations_until(range.start());
|
||||
self.indents[self.last_leaf_end..range.start()].fill(self.last_indent);
|
||||
}
|
||||
|
||||
self.indent += inc;
|
||||
|
||||
// If we increased the indentation, apply it to the remainder of the string, even if the
|
||||
// list is empty. For example (where _ represents the cursor):
|
||||
//
|
||||
// if foo
|
||||
// _
|
||||
//
|
||||
// we want to indent the newline.
|
||||
if inc != 0 {
|
||||
self.last_indent = self.indent;
|
||||
}
|
||||
|
||||
// If this is a leaf node, apply the current indentation.
|
||||
if node.category() == Category::leaf && range.length() != 0 {
|
||||
self.indents[range.start()..range.end()].fill(self.indent);
|
||||
self.last_leaf_end = range.end();
|
||||
self.last_indent = self.indent;
|
||||
}
|
||||
|
||||
node.accept(self, false);
|
||||
self.companion.as_mut().did_visit(dec);
|
||||
}
|
||||
}
|
||||
|
||||
#[cxx::bridge]
|
||||
#[allow(clippy::needless_lifetimes)] // false positive
|
||||
mod parse_util_ffi {
|
||||
extern "C++" {
|
||||
include!("ast.h");
|
||||
include!("parse_util.h");
|
||||
type indent_visitor_t = crate::ffi::indent_visitor_t;
|
||||
type Ast = crate::ast::Ast;
|
||||
type NodeFfi<'a> = crate::ast::NodeFfi<'a>;
|
||||
}
|
||||
extern "Rust" {
|
||||
type IndentVisitor<'a>;
|
||||
unsafe fn new_indent_visitor(
|
||||
companion: Pin<&mut indent_visitor_t>,
|
||||
) -> Box<IndentVisitor<'_>>;
|
||||
#[cxx_name = "visit"]
|
||||
unsafe fn visit_ffi<'a>(self: &mut IndentVisitor<'a>, node: &'a NodeFfi<'a>);
|
||||
}
|
||||
}
|
||||
|
||||
fn new_indent_visitor(companion: Pin<&mut indent_visitor_t>) -> Box<IndentVisitor<'_>> {
|
||||
Box::new(IndentVisitor { companion })
|
||||
}
|
||||
impl<'a> IndentVisitor<'a> {
|
||||
fn visit_ffi(self: &mut IndentVisitor<'a>, node: &'a NodeFfi<'a>) {
|
||||
self.visit(node.as_node());
|
||||
self.indent -= dec;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1577,3 +1773,200 @@ add_test!("test_escape_quotes", || {
|
|||
validate!("foo\nba'r", Some('"'), false, "foo\"\\n\"ba'r");
|
||||
validate!("foo\\\\bar", Some('"'), false, "foo\\\\\\\\bar");
|
||||
});
|
||||
|
||||
add_test!("test_indents", || {
|
||||
// A struct which is either text or a new indent.
|
||||
struct Segment {
|
||||
// The indent to set
|
||||
indent: i32,
|
||||
text: &'static str,
|
||||
}
|
||||
fn do_validate(segments: &[Segment]) {
|
||||
// Compute the indents.
|
||||
let mut expected_indents = vec![];
|
||||
let mut text = WString::new();
|
||||
for segment in segments {
|
||||
text.push_str(segment.text);
|
||||
for _ in segment.text.chars() {
|
||||
expected_indents.push(segment.indent);
|
||||
}
|
||||
}
|
||||
let indents = parse_util_compute_indents(&text);
|
||||
assert_eq!(indents, expected_indents);
|
||||
}
|
||||
macro_rules! validate {
|
||||
( $( $(,)? $indent:literal, $text:literal )* ) => {
|
||||
let segments = vec![
|
||||
$(
|
||||
Segment{ indent: $indent, text: $text },
|
||||
)*
|
||||
];
|
||||
do_validate(&segments);
|
||||
};
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
(|| {
|
||||
validate!(
|
||||
0, "if", 1, " foo",
|
||||
0, "\nend"
|
||||
);
|
||||
validate!(
|
||||
0, "if", 1, " foo",
|
||||
1, "\nfoo",
|
||||
0, "\nend"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "if", 1, " foo",
|
||||
1, "\nif", 2, " bar",
|
||||
1, "\nend",
|
||||
0, "\nend"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "if", 1, " foo",
|
||||
1, "\nif", 2, " bar",
|
||||
2, "\n",
|
||||
1, "\nend\n"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "if", 1, " foo",
|
||||
1, "\nif", 2, " bar",
|
||||
2, "\n"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "begin",
|
||||
1, "\nfoo",
|
||||
1, "\n"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "begin",
|
||||
1, "\n;",
|
||||
0, "end",
|
||||
0, "\nfoo", 0, "\n"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "begin",
|
||||
1, "\n;",
|
||||
0, "end",
|
||||
0, "\nfoo", 0, "\n"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "if", 1, " foo",
|
||||
1, "\nif", 2, " bar",
|
||||
2, "\nbaz",
|
||||
1, "\nend", 1, "\n"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "switch foo",
|
||||
1, "\n"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "switch foo",
|
||||
1, "\ncase bar",
|
||||
1, "\ncase baz",
|
||||
2, "\nquux",
|
||||
2, "\nquux"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0,
|
||||
"switch foo",
|
||||
1,
|
||||
"\ncas" // parse error indentation handling
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "while",
|
||||
1, " false",
|
||||
1, "\n# comment", // comment indentation handling
|
||||
1, "\ncommand",
|
||||
1, "\n# comment 2"
|
||||
);
|
||||
|
||||
validate!(
|
||||
0, "begin",
|
||||
1, "\n", // "begin" is special because this newline belongs to the block header
|
||||
1, "\n"
|
||||
);
|
||||
|
||||
// Continuation lines.
|
||||
validate!(
|
||||
0, "echo 'continuation line' \\",
|
||||
1, "\ncont",
|
||||
0, "\n"
|
||||
);
|
||||
validate!(
|
||||
0, "echo 'empty continuation line' \\",
|
||||
1, "\n"
|
||||
);
|
||||
validate!(
|
||||
0, "begin # continuation line in block",
|
||||
1, "\necho \\",
|
||||
2, "\ncont"
|
||||
);
|
||||
validate!(
|
||||
0, "begin # empty continuation line in block",
|
||||
1, "\necho \\",
|
||||
2, "\n",
|
||||
0, "\nend"
|
||||
);
|
||||
validate!(
|
||||
0, "echo 'multiple continuation lines' \\",
|
||||
1, "\nline1 \\",
|
||||
1, "\n# comment",
|
||||
1, "\n# more comment",
|
||||
1, "\nline2 \\",
|
||||
1, "\n"
|
||||
);
|
||||
validate!(
|
||||
0, "echo # inline comment ending in \\",
|
||||
0, "\nline"
|
||||
);
|
||||
validate!(
|
||||
0, "# line comment ending in \\",
|
||||
0, "\nline"
|
||||
);
|
||||
validate!(
|
||||
0, "echo 'multiple empty continuation lines' \\",
|
||||
1, "\n\\",
|
||||
1, "\n",
|
||||
0, "\n"
|
||||
);
|
||||
validate!(
|
||||
0, "echo 'multiple statements with continuation lines' \\",
|
||||
1, "\nline 1",
|
||||
0, "\necho \\",
|
||||
1, "\n"
|
||||
);
|
||||
// This is an edge case, probably okay to change the behavior here.
|
||||
validate!(
|
||||
0, "begin",
|
||||
1, " \\",
|
||||
2, "\necho 'continuation line in block header' \\",
|
||||
2, "\n",
|
||||
1, "\n",
|
||||
0, "\nend"
|
||||
);
|
||||
})();
|
||||
});
|
||||
|
||||
#[cxx::bridge]
|
||||
mod parse_util_ffi {
|
||||
extern "Rust" {
|
||||
fn parse_util_compute_indents_ffi(src: &CxxWString) -> Vec<i32>;
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_util_compute_indents_ffi(src: &CxxWString) -> Vec<i32> {
|
||||
parse_util_compute_indents(&src.from_ffi())
|
||||
}
|
||||
|
|
|
@ -1190,208 +1190,6 @@ static void test_cancellation() {
|
|||
signal_clear_cancel();
|
||||
}
|
||||
|
||||
namespace indent_tests {
|
||||
// A struct which is either text or a new indent.
|
||||
struct segment_t {
|
||||
// The indent to set
|
||||
int indent{0};
|
||||
const char *text{nullptr};
|
||||
|
||||
/* implicit */ segment_t(int indent) : indent(indent) {}
|
||||
/* implicit */ segment_t(const char *text) : text(text) {}
|
||||
};
|
||||
|
||||
using indent_test_t = std::vector<segment_t>;
|
||||
using indent_test_list_t = std::vector<indent_test_t>;
|
||||
|
||||
// Add a new test to a test list based on a series of ints and texts.
|
||||
template <typename... Types>
|
||||
void add_test(indent_test_list_t *v, const Types &...types) {
|
||||
segment_t segments[] = {types...};
|
||||
v->emplace_back(std::begin(segments), std::end(segments));
|
||||
}
|
||||
} // namespace indent_tests
|
||||
|
||||
static void test_indents() {
|
||||
say(L"Testing indents");
|
||||
using namespace indent_tests;
|
||||
|
||||
indent_test_list_t tests;
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
0, "\nend");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nfoo", //
|
||||
0, "\nend");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nif", 2, " bar", //
|
||||
1, "\nend", //
|
||||
0, "\nend");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nif", 2, " bar", //
|
||||
2, "\n", //
|
||||
1, "\nend\n");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nif", 2, " bar", //
|
||||
2, "\n");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "begin", //
|
||||
1, "\nfoo", //
|
||||
1, "\n");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "begin", //
|
||||
1, "\n;", //
|
||||
0, "end", //
|
||||
0, "\nfoo", 0, "\n");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "begin", //
|
||||
1, "\n;", //
|
||||
0, "end", //
|
||||
0, "\nfoo", 0, "\n");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nif", 2, " bar", //
|
||||
2, "\nbaz", //
|
||||
1, "\nend", 1, "\n");
|
||||
|
||||
add_test(&tests, //
|
||||
0, "switch foo", //
|
||||
1, "\n" //
|
||||
);
|
||||
|
||||
add_test(&tests, //
|
||||
0, "switch foo", //
|
||||
1, "\ncase bar", //
|
||||
1, "\ncase baz", //
|
||||
2, "\nquux", //
|
||||
2, "\nquux" //
|
||||
);
|
||||
|
||||
add_test(&tests, //
|
||||
0, "switch foo", //
|
||||
1, "\ncas" // parse error indentation handling
|
||||
);
|
||||
|
||||
add_test(&tests, //
|
||||
0, "while", 1, " false", //
|
||||
1, "\n# comment", // comment indentation handling
|
||||
1, "\ncommand", //
|
||||
1, "\n# comment 2" //
|
||||
);
|
||||
|
||||
add_test(&tests, //
|
||||
0, "begin", //
|
||||
1, "\n", // "begin" is special because this newline belongs to the block header
|
||||
1, "\n" //
|
||||
);
|
||||
|
||||
// Continuation lines.
|
||||
add_test(&tests, //
|
||||
0, "echo 'continuation line' \\", //
|
||||
1, "\ncont", //
|
||||
0, "\n" //
|
||||
);
|
||||
add_test(&tests, //
|
||||
0, "echo 'empty continuation line' \\", //
|
||||
1, "\n" //
|
||||
);
|
||||
add_test(&tests, //
|
||||
0, "begin # continuation line in block", //
|
||||
1, "\necho \\", //
|
||||
2, "\ncont" //
|
||||
);
|
||||
add_test(&tests, //
|
||||
0, "begin # empty continuation line in block", //
|
||||
1, "\necho \\", //
|
||||
2, "\n", //
|
||||
0, "\nend" //
|
||||
);
|
||||
add_test(&tests, //
|
||||
0, "echo 'multiple continuation lines' \\", //
|
||||
1, "\nline1 \\", //
|
||||
1, "\n# comment", //
|
||||
1, "\n# more comment", //
|
||||
1, "\nline2 \\", //
|
||||
1, "\n" //
|
||||
);
|
||||
add_test(&tests, //
|
||||
0, "echo # inline comment ending in \\", //
|
||||
0, "\nline" //
|
||||
);
|
||||
add_test(&tests, //
|
||||
0, "# line comment ending in \\", //
|
||||
0, "\nline" //
|
||||
);
|
||||
add_test(&tests, //
|
||||
0, "echo 'multiple empty continuation lines' \\", //
|
||||
1, "\n\\", //
|
||||
1, "\n", //
|
||||
0, "\n" //
|
||||
);
|
||||
add_test(&tests, //
|
||||
0, "echo 'multiple statements with continuation lines' \\", //
|
||||
1, "\nline 1", //
|
||||
0, "\necho \\", //
|
||||
1, "\n" //
|
||||
);
|
||||
// This is an edge case, probably okay to change the behavior here.
|
||||
add_test(&tests, //
|
||||
0, "begin", 1, " \\", //
|
||||
2, "\necho 'continuation line in block header' \\", //
|
||||
2, "\n", //
|
||||
1, "\n", //
|
||||
0, "\nend" //
|
||||
);
|
||||
|
||||
int test_idx = 0;
|
||||
for (const indent_test_t &test : tests) {
|
||||
// Construct the input text and expected indents.
|
||||
wcstring text;
|
||||
std::vector<int> expected_indents;
|
||||
int current_indent = 0;
|
||||
for (const segment_t &segment : test) {
|
||||
if (!segment.text) {
|
||||
current_indent = segment.indent;
|
||||
} else {
|
||||
wcstring tmp = str2wcstring(segment.text);
|
||||
text.append(tmp);
|
||||
expected_indents.insert(expected_indents.end(), tmp.size(), current_indent);
|
||||
}
|
||||
}
|
||||
do_test(expected_indents.size() == text.size());
|
||||
|
||||
// Compute the indents.
|
||||
std::vector<int> indents = parse_util_compute_indents(text);
|
||||
|
||||
if (expected_indents.size() != indents.size()) {
|
||||
err(L"Indent vector has wrong size! Expected %lu, actual %lu", expected_indents.size(),
|
||||
indents.size());
|
||||
}
|
||||
do_test(expected_indents.size() == indents.size());
|
||||
for (size_t i = 0; i < text.size(); i++) {
|
||||
if (expected_indents.at(i) != indents.at(i)) {
|
||||
err(L"Wrong indent at index %lu (char 0x%02x) in test #%lu (expected %d, actual "
|
||||
L"%d):\n%ls\n",
|
||||
i, text.at(i), test_idx, expected_indents.at(i), indents.at(i), text.c_str());
|
||||
break; // don't keep showing errors for the rest of the test
|
||||
}
|
||||
}
|
||||
test_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_const_strlen() {
|
||||
do_test(const_strlen("") == 0);
|
||||
do_test(const_strlen(L"") == 0);
|
||||
|
@ -1465,7 +1263,7 @@ void test_dir_iter() {
|
|||
const wcstring selflinkname = L"selflink"; // link to self
|
||||
const wcstring fifoname = L"fifo";
|
||||
const std::vector<wcstring> names = {dirname, regname, reglinkname, dirlinkname,
|
||||
badlinkname, selflinkname, fifoname};
|
||||
badlinkname, selflinkname, fifoname};
|
||||
|
||||
const auto is_link_name = [&](const wcstring &name) -> bool {
|
||||
return contains({reglinkname, dirlinkname, badlinkname, selflinkname}, name);
|
||||
|
@ -3988,7 +3786,7 @@ void history_tests_t::test_history() {
|
|||
say(L"Testing history");
|
||||
|
||||
const std::vector<wcstring> items = {L"Gamma", L"beta", L"BetA", L"Beta", L"alpha",
|
||||
L"AlphA", L"Alpha", L"alph", L"ALPH", L"ZZZ"};
|
||||
L"AlphA", L"Alpha", L"alph", L"ALPH", L"ZZZ"};
|
||||
const history_search_flags_t nocase = history_search_ignore_case;
|
||||
|
||||
// Populate a history.
|
||||
|
@ -6625,7 +6423,6 @@ static const test_t s_tests[]{
|
|||
{TEST_GROUP("debounce"), test_debounce_timeout},
|
||||
{TEST_GROUP("parser"), test_parser},
|
||||
{TEST_GROUP("cancellation"), test_cancellation},
|
||||
{TEST_GROUP("indents"), test_indents},
|
||||
{TEST_GROUP("utf8"), test_utf8},
|
||||
{TEST_GROUP("escape_sequences"), test_escape_sequences},
|
||||
{TEST_GROUP("lru"), test_lru},
|
||||
|
|
|
@ -593,196 +593,9 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote,
|
|||
return result;
|
||||
}
|
||||
|
||||
indent_visitor_t::indent_visitor_t(const wcstring &src, std::vector<int> &indents)
|
||||
: src(src), indents(indents), visitor(new_indent_visitor(*this)) {}
|
||||
|
||||
bool indent_visitor_t::has_newline(const ast::maybe_newlines_t &nls) const {
|
||||
return nls.ptr()->source(src)->find(L'\n') != wcstring::npos;
|
||||
}
|
||||
|
||||
int indent_visitor_t::visit(const void *node_) {
|
||||
auto &node = *static_cast<const ast::node_t *>(node_);
|
||||
int inc = 0;
|
||||
int dec = 0;
|
||||
using namespace ast;
|
||||
switch (node.typ()) {
|
||||
case type_t::job_list:
|
||||
case type_t::andor_job_list:
|
||||
// Job lists are never unwound.
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
break;
|
||||
|
||||
// Increment indents for conditions in headers (#1665).
|
||||
case type_t::job_conjunction:
|
||||
if (node.parent()->typ() == type_t::while_header ||
|
||||
node.parent()->typ() == type_t::if_clause) {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
// Increment indents for job_continuation_t if it contains a newline.
|
||||
// This is a bit of a hack - it indents cases like:
|
||||
// cmd1 |
|
||||
// ....cmd2
|
||||
// but avoids "double indenting" if there's no newline:
|
||||
// cmd1 | while cmd2
|
||||
// ....cmd3
|
||||
// end
|
||||
// See #7252.
|
||||
case type_t::job_continuation:
|
||||
if (has_newline(node.as_job_continuation().newlines())) {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
// Likewise for && and ||.
|
||||
case type_t::job_conjunction_continuation:
|
||||
if (has_newline(node.as_job_conjunction_continuation().newlines())) {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case type_t::case_item_list:
|
||||
// Here's a hack. Consider:
|
||||
// switch abc
|
||||
// cas
|
||||
//
|
||||
// fish will see that 'cas' is not valid inside a switch statement because it is
|
||||
// not "case". It will then unwind back to the top level job list, producing a
|
||||
// parse tree like:
|
||||
//
|
||||
// job_list
|
||||
// switch_job
|
||||
// <err>
|
||||
// normal_job
|
||||
// cas
|
||||
//
|
||||
// And so we will think that the 'cas' job is at the same level as the switch.
|
||||
// To address this, if we see that the switch statement was not closed, do not
|
||||
// decrement the indent afterwards.
|
||||
inc = 1;
|
||||
dec = node.parent()->as_switch_statement().end().ptr()->has_source() ? 1 : 0;
|
||||
break;
|
||||
case type_t::token_base: {
|
||||
if (node.parent()->typ() == type_t::begin_header &&
|
||||
node.token_type() == parse_token_type_t::end) {
|
||||
// The newline after "begin" is optional, so it is part of the header.
|
||||
// The header is not in the indented block, so indent the newline here.
|
||||
if (*node.source(src) == L"\n") {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
auto range = node.source_range();
|
||||
if (range.length > 0 && node.category() == category_t::leaf) {
|
||||
record_line_continuations_until(range.start);
|
||||
std::fill(indents.begin() + last_leaf_end, indents.begin() + range.start, last_indent);
|
||||
}
|
||||
|
||||
indent += inc;
|
||||
|
||||
// If we increased the indentation, apply it to the remainder of the string, even if the
|
||||
// list is empty. For example (where _ represents the cursor):
|
||||
//
|
||||
// if foo
|
||||
// _
|
||||
//
|
||||
// we want to indent the newline.
|
||||
if (inc) {
|
||||
last_indent = indent;
|
||||
}
|
||||
|
||||
// If this is a leaf node, apply the current indentation.
|
||||
if (node.category() == category_t::leaf && range.length > 0) {
|
||||
std::fill(indents.begin() + range.start, indents.begin() + range.end(), indent);
|
||||
last_leaf_end = range.start + range.length;
|
||||
last_indent = indent;
|
||||
}
|
||||
|
||||
return dec;
|
||||
}
|
||||
|
||||
void indent_visitor_t::did_visit(int dec) { indent -= dec; }
|
||||
|
||||
void indent_visitor_t::record_line_continuations_until(size_t offset) {
|
||||
wcstring gap_text = src.substr(last_leaf_end, offset - last_leaf_end);
|
||||
size_t escaped_nl = gap_text.find(L"\\\n");
|
||||
if (escaped_nl == wcstring::npos) return;
|
||||
auto line_end = gap_text.begin() + escaped_nl;
|
||||
if (std::find(gap_text.begin(), line_end, L'#') != line_end) return;
|
||||
auto end = src.begin() + offset;
|
||||
auto newline = src.begin() + last_leaf_end + escaped_nl + 1;
|
||||
// The gap text might contain multiple newlines if there are multiple lines that
|
||||
// don't contain an AST node, for example, comment lines, or lines containing only
|
||||
// the escaped newline.
|
||||
do {
|
||||
line_continuations.push_back(newline - src.begin());
|
||||
newline = std::find(newline + 1, end, L'\n');
|
||||
} while (newline != end);
|
||||
}
|
||||
|
||||
std::vector<int> parse_util_compute_indents(const wcstring &src) {
|
||||
// Make a vector the same size as the input string, which contains the indents. Initialize them
|
||||
// to 0.
|
||||
const size_t src_size = src.size();
|
||||
std::vector<int> indents(src_size, 0);
|
||||
|
||||
// Simple trick: if our source does not contain a newline, then all indents are 0.
|
||||
if (src.find('\n') == wcstring::npos) {
|
||||
return indents;
|
||||
}
|
||||
|
||||
// Parse the string. We pass continue_after_error to produce a forest; the trailing indent of
|
||||
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
|
||||
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
|
||||
// were a case item list.
|
||||
using namespace ast;
|
||||
auto ast =
|
||||
ast_parse(src, parse_flag_continue_after_error | parse_flag_include_comments |
|
||||
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated);
|
||||
|
||||
indent_visitor_t iv(src, indents);
|
||||
iv.visitor->visit(*ast->top());
|
||||
iv.record_line_continuations_until(indents.size());
|
||||
std::fill(indents.begin() + iv.last_leaf_end, indents.end(), iv.last_indent);
|
||||
|
||||
// All newlines now get the *next* indent.
|
||||
// For example, in this code:
|
||||
// if true
|
||||
// stuff
|
||||
// the newline "belongs" to the if statement as it ends its job.
|
||||
// But when rendered, it visually belongs to the job list.
|
||||
|
||||
size_t idx = src_size;
|
||||
int next_indent = iv.last_indent;
|
||||
while (idx--) {
|
||||
if (src.at(idx) == L'\n') {
|
||||
bool empty_middle_line = idx + 1 < src_size && src.at(idx + 1) == L'\n';
|
||||
if (!empty_middle_line) {
|
||||
indents.at(idx) = next_indent;
|
||||
}
|
||||
} else {
|
||||
next_indent = indents.at(idx);
|
||||
}
|
||||
}
|
||||
// Add an extra level of indentation to continuation lines.
|
||||
for (size_t idx : iv.line_continuations) {
|
||||
do {
|
||||
indents.at(idx)++;
|
||||
} while (++idx < src_size && src.at(idx) != L'\n');
|
||||
}
|
||||
|
||||
return indents;
|
||||
auto indents = parse_util_compute_indents_ffi(src);
|
||||
return {indents.begin(), indents.end()};
|
||||
}
|
||||
|
||||
/// Append a syntax error to the given error list.
|
||||
|
|
|
@ -114,47 +114,6 @@ wchar_t parse_util_get_quote_type(const wcstring &cmd, size_t pos);
|
|||
wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote,
|
||||
bool no_tilde = false);
|
||||
|
||||
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
|
||||
// visiting its children.
|
||||
struct IndentVisitor;
|
||||
struct indent_visitor_t {
|
||||
indent_visitor_t(const wcstring &src, std::vector<int> &indents);
|
||||
indent_visitor_t(const indent_visitor_t &) = delete;
|
||||
indent_visitor_t &operator=(const indent_visitor_t &) = delete;
|
||||
|
||||
int visit(const void *node);
|
||||
void did_visit(int dec);
|
||||
|
||||
#if INCLUDE_RUST_HEADERS
|
||||
/// \return whether a maybe_newlines node contains at least one newline.
|
||||
bool has_newline(const ast::maybe_newlines_t &nls) const;
|
||||
|
||||
void record_line_continuations_until(size_t offset);
|
||||
|
||||
// The one-past-the-last index of the most recently encountered leaf node.
|
||||
// We use this to populate the indents even if there's no tokens in the range.
|
||||
size_t last_leaf_end{0};
|
||||
|
||||
// The last indent which we assigned.
|
||||
int last_indent{-1};
|
||||
|
||||
// The source we are indenting.
|
||||
const wcstring &src;
|
||||
|
||||
// List of indents, which we populate.
|
||||
std::vector<int> &indents;
|
||||
|
||||
// Initialize our starting indent to -1, as our top-level node is a job list which
|
||||
// will immediately increment it.
|
||||
int indent{-1};
|
||||
|
||||
// List of locations of escaped newline characters.
|
||||
std::vector<size_t> line_continuations;
|
||||
|
||||
rust::Box<IndentVisitor> visitor;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// Given a string, parse it as fish code and then return the indents. The return value has the same
|
||||
/// size as the string.
|
||||
std::vector<int> parse_util_compute_indents(const wcstring &src);
|
||||
|
|
Loading…
Reference in a new issue