Strip BOM from output in interactive mode (#1938)

* Strip BOM from output in interactive mode

* Strip BOM when not loop_through, add regression tests

* Update CHANGELOG.md

* Only strip BOM from beginning of first line

* Fix integration test on macOS that relied on color scheme

* Fix integration test on Windows that relied on detected terminal width

* Fix syntax test that was failing due to a previously wrong (now fixed) highlighting

Co-authored-by: David Peter <mail@david-peter.de>
Co-authored-by: Martin Nordholts <enselic@gmail.com>
This commit is contained in:
dag-h 2022-09-06 19:08:38 +02:00 committed by GitHub
parent 0e03dce130
commit 08386daa3a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 67 additions and 4 deletions

View file

@ -10,6 +10,7 @@
- Prevent fork nightmare with `PAGER=batcat`. See #2235 (@johnmatthiggins)
- Make `--no-paging`/`-P` override `--paging=...` if passed as a later arg, see #2201 (@themkat)
- `--map-syntax` and `--ignored-suffix` now works together, see #2093 (@czzrr)
- Strips byte order mark from output when in non-loop-through mode. See #1922 (@dag-h)
## Other

View file

@ -419,7 +419,7 @@ impl<'a> Printer for InteractivePrinter<'a> {
let line = if self.config.show_nonprintable {
replace_nonprintable(line_buffer, self.config.tab_width)
} else {
match self.content_type {
let line = match self.content_type {
Some(ContentType::BINARY) | None => {
return Ok(());
}
@ -430,6 +430,15 @@ impl<'a> Printer for InteractivePrinter<'a> {
.decode(line_buffer, DecoderTrap::Replace)
.map_err(|_| "Invalid UTF-16BE")?,
_ => String::from_utf8_lossy(line_buffer).to_string(),
};
// Remove byte order mark from the first line if it exists
if line_number == 1 {
match line.strip_prefix('\u{feff}') {
Some(stripped) => stripped.to_string(),
None => line,
}
} else {
line
}
};

1
tests/examples/test_BOM.txt vendored Normal file
View file

@ -0,0 +1 @@
hello world

View file

@ -758,14 +758,66 @@ fn config_read_arguments_from_file() {
#[test]
fn utf16() {
// The output will be converted to UTF-8 with a leading UTF-8 BOM
// The output will be converted to UTF-8 with the leading UTF-16
// BOM removed. This behavior is wanted in interactive mode as
// some terminals seem to display the BOM character as a space,
// and it also breaks syntax highlighting.
bat()
.arg("--plain")
.arg("--decorations=always")
.arg("test_UTF-16LE.txt")
.assert()
.success()
.stdout(std::str::from_utf8(b"\xEF\xBB\xBFhello world\n").unwrap());
.stdout("hello world\n");
}
// Regression test for https://github.com/sharkdp/bat/issues/1922
#[test]
fn bom_not_stripped_in_loop_through_mode() {
bat()
.arg("--plain")
.arg("--decorations=never")
.arg("--color=never")
.arg("test_BOM.txt")
.assert()
.success()
.stdout("\u{feff}hello world\n");
}
// Regression test for https://github.com/sharkdp/bat/issues/1922
#[test]
fn bom_stripped_when_colored_output() {
bat()
.arg("--color=always")
.arg("--decorations=never")
.arg("test_BOM.txt")
.assert()
.success()
.stdout(
predicate::str::is_match("\u{1b}\\[38;5;[0-9]{3}mhello world\u{1b}\\[0m\n").unwrap(),
);
}
// Regression test for https://github.com/sharkdp/bat/issues/1922
#[test]
fn bom_stripped_when_no_color_and_not_loop_through() {
bat()
.arg("--color=never")
.arg("--decorations=always")
.arg("--style=numbers,grid,header")
.arg("--terminal-width=80")
.arg("test_BOM.txt")
.assert()
.success()
.stdout(
"\
File: test_BOM.txt
1 hello world
",
);
}
#[test]

View file

@ -1,4 +1,4 @@
# PowerShell script for testing syntax highlighting
# PowerShell script for testing syntax highlighting
function Get-FutureTime {
 param (