Merge branch 'master' into 2783-setting-terminal-title

This commit is contained in:
Oliver Looney 2024-02-11 22:53:48 +00:00 committed by GitHub
commit c3f2ddf509
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
58 changed files with 1940 additions and 362 deletions

View file

@ -6,7 +6,9 @@
## Bugfixes
- Fix long file name wrapping in header, see #2835 (@FilipRazek)
- Fix `NO_COLOR` support, see #2767 (@acuteenvy)
- Fix handling of inputs with OSC ANSI escape sequences, see #2541 and #2544 (@eth-p)
## Other
@ -17,18 +19,27 @@
- Minor benchmark script improvements #2768 (@cyqsimon)
- Update Arch Linux package URL in README files #2779 (@brunobell)
- Update and improve `zsh` completion, see #2772 (@okapia)
- More extensible syntax mapping mechanism #2755 (@cyqsimon)
- Use proper Architecture for Debian packages built for musl, see #2811 (@Enselic)
- Pull in fix for unsafe-libyaml security advisory, see #2812 (@dtolnay)
- Update git-version dependency to use Syn v2, see #2816 (@dtolnay)
- Update git2 dependency to v0.18.2, see #2852 (@eth-p)
## Syntaxes
- `cmd-help`: scope subcommands followed by other terms, and other misc improvements, see #2819 (@victor-gp)
- Upgrade JQ syntax, see #2820 (@dependabot[bot])
## Themes
## `bat` as a library
- Changes to `syntax_mapping::SyntaxMapping` #2755 (@cyqsimon)
- `SyntaxMapping::get_syntax_for` is now correctly public
- [BREAKING] `SyntaxMapping::{empty,builtin}` are removed; use `SyntaxMapping::new` instead
- [BREAKING] `SyntaxMapping::mappings` is replaced by `SyntaxMapping::{builtin,custom,all}_mappings`
- Make `Controller::run_with_error_handler`'s error handler `FnMut`, see #2831 (@rhysd)
- Improve compile time by 20%, see #2815 (@dtolnay)
# v0.24.0

166
Cargo.lock generated
View file

@ -129,6 +129,8 @@ dependencies = [
"globset",
"grep-cli",
"home",
"indexmap 2.2.2",
"itertools",
"nix",
"nu-ansi-term",
"once_cell",
@ -140,12 +142,15 @@ dependencies = [
"run_script",
"semver",
"serde",
"serde_derive",
"serde_with",
"serde_yaml",
"serial_test",
"shell-words",
"syntect",
"tempfile",
"thiserror",
"toml",
"unicode-width",
"wait-timeout",
"walkdir",
@ -224,11 +229,12 @@ checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc"
[[package]]
name = "cc"
version = "1.0.73"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"jobserver",
"libc",
]
[[package]]
@ -267,13 +273,14 @@ checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "clircle"
version = "0.4.0"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8e87cbed5354f17bd8ca8821a097fb62599787fe8f611743fad7ee156a0a600"
checksum = "ec0b92245ea62a7a751db4b0e4a583f8978e508077ef6de24fcc0d0dc5311a8d"
dependencies = [
"cfg-if",
"libc",
"serde",
"serde_derive",
"winapi",
]
@ -314,6 +321,41 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "darling"
version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "dashmap"
version = "5.4.0"
@ -540,7 +582,7 @@ dependencies = [
"bstr",
"log",
"regex-automata",
"regex-syntax 0.8.2",
"regex-syntax",
]
[[package]]
@ -578,6 +620,12 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "0.3.0"
@ -600,12 +648,13 @@ dependencies = [
[[package]]
name = "indexmap"
version = "2.0.2"
version = "2.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897"
checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520"
dependencies = [
"equivalent",
"hashbrown 0.14.1",
"serde",
]
[[package]]
@ -646,9 +695,9 @@ checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
[[package]]
name = "libgit2-sys"
version = "0.16.1+1.7.1"
version = "0.16.2+1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c"
checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8"
dependencies = [
"cc",
"libc",
@ -980,7 +1029,7 @@ dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax 0.8.2",
"regex-syntax",
]
[[package]]
@ -991,15 +1040,9 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.8.2",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "regex-syntax"
version = "0.8.2"
@ -1066,9 +1109,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "semver"
version = "1.0.20"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0"
[[package]]
name = "serde"
@ -1101,13 +1144,45 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_spanned"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1"
dependencies = [
"serde",
]
[[package]]
name = "serde_with"
version = "3.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15d167997bd841ec232f5b2b8e0e26606df2e7caa4c31b95ea9ca52b200bd270"
dependencies = [
"serde",
"serde_derive",
"serde_with_macros",
]
[[package]]
name = "serde_with_macros"
version = "3.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "865f9743393e638991566a8b7a479043c2c8da94a33e0a31f18214c9cae0a64d"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_yaml"
version = "0.9.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a15e0ef66bf939a7c890a0bf6d5a733c70202225f9888a89ed5c62298b019129"
dependencies = [
"indexmap 2.0.2",
"indexmap 2.2.2",
"itoa",
"ryu",
"serde",
@ -1180,9 +1255,9 @@ dependencies = [
[[package]]
name = "syntect"
version = "5.1.0"
version = "5.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e02b4b303bf8d08bfeb0445cba5068a3d306b6baece1d5582171a9bf49188f91"
checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1"
dependencies = [
"bincode",
"bitflags 1.3.2",
@ -1192,8 +1267,9 @@ dependencies = [
"once_cell",
"onig",
"plist",
"regex-syntax 0.7.5",
"regex-syntax",
"serde",
"serde_derive",
"serde_json",
"thiserror",
"walkdir",
@ -1294,6 +1370,41 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "toml"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6a4b9e8023eb94392d3dca65d717c53abc5dad49c07cb65bb8fcd87115fa325"
dependencies = [
"indexmap 2.2.2",
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1"
dependencies = [
"indexmap 2.2.2",
"serde",
"serde_spanned",
"toml_datetime",
"winnow",
]
[[package]]
name = "unicode-bidi"
version = "0.3.8"
@ -1613,6 +1724,15 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
[[package]]
name = "winnow"
version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "176b6138793677221d420fd2f0aeeced263f197688b36484660da767bca2fa32"
dependencies = [
"memchr",
]
[[package]]
name = "yaml-rust"
version = "0.4.5"

View file

@ -53,11 +53,12 @@ content_inspector = "0.2.4"
shell-words = { version = "1.1.0", optional = true }
unicode-width = "0.1.11"
globset = "0.4"
serde = { version = "1.0", features = ["derive"] }
serde = "1.0"
serde_derive = "1.0"
serde_yaml = "0.9.28"
semver = "1.0"
path_abs = { version = "0.5", default-features = false }
clircle = "0.4"
clircle = "0.5"
bugreport = { version = "0.5.0", optional = true }
etcetera = { version = "0.8.0", optional = true }
grep-cli = { version = "0.1.10", optional = true }
@ -74,7 +75,7 @@ optional = true
default-features = false
[dependencies.syntect]
version = "5.1.0"
version = "5.2.0"
default-features = false
features = ["parsing"]
@ -94,12 +95,22 @@ serial_test = { version = "2.0.0", default-features = false }
predicates = "3.0.4"
wait-timeout = "0.2.0"
tempfile = "3.8.1"
serde = { version = "1.0", features = ["derive"] }
[target.'cfg(unix)'.dev-dependencies]
nix = { version = "0.26.4", default-features = false, features = ["term"] }
[build-dependencies]
anyhow = "1.0.78"
indexmap = { version = "2.2.2", features = ["serde"] }
itertools = "0.11.0"
once_cell = "1.18"
regex = "1.10.2"
serde = "1.0"
serde_derive = "1.0"
serde_with = { version = "3.6.1", default-features = false, features = ["macros"] }
toml = { version = "0.8.9", features = ["preserve_order"] }
walkdir = "2.4"
[build-dependencies.clap]
version = "4.4.12"

View file

@ -602,7 +602,8 @@ set, `less` is used by default. If you want to use a different pager, you can ei
`PAGER` variable or set the `BAT_PAGER` environment variable to override what is specified in
`PAGER`.
**Note**: If `PAGER` is `more` or `most`, `bat` will silently use `less` instead to ensure support for colors.
>[!NOTE]
> If `PAGER` is `more` or `most`, `bat` will silently use `less` instead to ensure support for colors.
If you want to pass command-line arguments to the pager, you can also set them via the
`PAGER`/`BAT_PAGER` variables:
@ -613,20 +614,37 @@ export BAT_PAGER="less -RF"
Instead of using environment variables, you can also use `bat`s [configuration file](https://github.com/sharkdp/bat#configuration-file) to configure the pager (`--pager` option).
**Note**: By default, if the pager is set to `less` (and no command-line options are specified),
`bat` will pass the following command line options to the pager: `-R`/`--RAW-CONTROL-CHARS`,
`-F`/`--quit-if-one-screen` and `-X`/`--no-init`. The last option (`-X`) is only used for `less`
versions older than 530.
The `-R` option is needed to interpret ANSI colors correctly. The second option (`-F`) instructs
less to exit immediately if the output size is smaller than the vertical size of the terminal.
This is convenient for small files because you do not have to press `q` to quit the pager. The
third option (`-X`) is needed to fix a bug with the `--quit-if-one-screen` feature in old versions
of `less`. Unfortunately, it also breaks mouse-wheel support in `less`.
### Using `less` as a pager
If you want to enable mouse-wheel scrolling on older versions of `less`, you can pass just `-R` (as
in the example above, this will disable the quit-if-one-screen feature). For less 530 or newer,
it should work out of the box.
When using `less` as a pager, `bat` will automatically pass extra options along to `less`
to improve the experience. Specifically, `-R`/`--RAW-CONTROL-CHARS`, `-F`/`--quit-if-one-screen`,
and under certain conditions, `-X`/`--no-init` and/or `-S`/`--chop-long-lines`.
>[!IMPORTANT]
> These options will not be added if:
> - The pager is not named `less`.
> - The `--pager` argument contains any command-line arguments (e.g. `--pager="less -R"`).
> - The `BAT_PAGER` environment variable contains any command-line arguments (e.g. `export BAT_PAGER="less -R"`)
>
> The `--quit-if-one-screen` option will not be added when:
> - The `--paging=always` argument is used.
> - The `BAT_PAGING` environment is set to `always`.
The `-R` option is needed to interpret ANSI colors correctly.
The `-F` option instructs `less` to exit immediately if the output size is smaller than
the vertical size of the terminal. This is convenient for small files because you do not
have to press `q` to quit the pager.
The `-X` option is needed to fix a bug with the `--quit-if-one-screen` feature in versions
of `less` older than version 530. Unfortunately, it also breaks mouse-wheel support in `less`.
If you want to enable mouse-wheel scrolling on older versions of `less` and do not mind losing
the quit-if-one-screen feature, you can set the pager (via `--pager` or `BAT_PAGER`) to `less -R`.
For `less` 530 or newer, it should work out of the box.
The `-S` option is added when `bat`'s `-S`/`--chop-long-lines` option is used. This tells `less`
to truncate any lines larger than the terminal width.
### Indentation

@ -1 +1 @@
Subproject commit 687058289c1a888e0895378432d66b41609a84d8
Subproject commit b7e53e5d86814f04a48d2e441bcf5f9fdf07e9c1

@ -1 +1 @@
Subproject commit b150d84534dd060afdcaf3f58977faeaf5917e56
Subproject commit 209559b72f7e8848c988828088231b3a4d8b6838

@ -1 +1 @@
Subproject commit e627f1cb223c1171ab0a6a48d166c87aeae2a1d5
Subproject commit 86d4ee7a1f884851a1d21d66249687f527fced32

View file

@ -1,5 +1,6 @@
#[cfg(feature = "application")]
mod application;
mod syntax_mapping;
mod util;
fn main() -> anyhow::Result<()> {
@ -7,6 +8,8 @@ fn main() -> anyhow::Result<()> {
// see: https://doc.rust-lang.org/cargo/reference/build-scripts.html#rerun-if-changed
println!("cargo:rerun-if-changed=build/");
syntax_mapping::build_static_mappings()?;
#[cfg(feature = "application")]
application::gen_man_and_comp()?;

292
build/syntax_mapping.rs Normal file
View file

@ -0,0 +1,292 @@
use std::{
convert::Infallible,
env, fs,
path::{Path, PathBuf},
str::FromStr,
};
use anyhow::{anyhow, bail};
use indexmap::IndexMap;
use itertools::Itertools;
use once_cell::sync::Lazy;
use regex::Regex;
use serde_derive::Deserialize;
use serde_with::DeserializeFromStr;
use walkdir::WalkDir;
/// Known mapping targets.
///
/// Corresponds to `syntax_mapping::MappingTarget`.
#[allow(clippy::enum_variant_names)]
#[derive(Clone, Debug, Eq, PartialEq, Hash, DeserializeFromStr)]
pub enum MappingTarget {
MapTo(String),
MapToUnknown,
MapExtensionToUnknown,
}
impl FromStr for MappingTarget {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown),
"MappingTarget::MapExtensionToUnknown" => Ok(Self::MapExtensionToUnknown),
syntax => Ok(Self::MapTo(syntax.into())),
}
}
}
impl MappingTarget {
fn codegen(&self) -> String {
match self {
Self::MapTo(syntax) => format!(r###"MappingTarget::MapTo(r#"{syntax}"#)"###),
Self::MapToUnknown => "MappingTarget::MapToUnknown".into(),
Self::MapExtensionToUnknown => "MappingTarget::MapExtensionToUnknown".into(),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, DeserializeFromStr)]
/// A single matcher.
///
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
struct Matcher(Vec<MatcherSegment>);
/// Parse a matcher.
///
/// Note that this implementation is rather strict: it will greedily interpret
/// every valid environment variable replacement as such, then immediately
/// hard-error if it finds a '$', '{', or '}' anywhere in the remaining text
/// segments.
///
/// The reason for this strictness is I currently cannot think of a valid reason
/// why you would ever need '$', '{', or '}' as plaintext in a glob pattern.
/// Therefore any such occurrences are likely human errors.
///
/// If we later discover some edge cases, it's okay to make it more permissive.
impl FromStr for Matcher {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use MatcherSegment as Seg;
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
let mut segments = vec![];
let mut text_start = 0;
for capture in VAR_REGEX.captures_iter(s) {
let match_0 = capture.get(0).unwrap();
// text before this var
let text_end = match_0.start();
segments.push(Seg::Text(s[text_start..text_end].into()));
text_start = match_0.end();
// this var
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
}
// possible trailing text
segments.push(Seg::Text(s[text_start..].into()));
// cleanup empty text segments
let non_empty_segments = segments
.into_iter()
.filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true))
.collect_vec();
// sanity check
if non_empty_segments
.windows(2)
.any(|segs| segs[0].is_text() && segs[1].is_text())
{
unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}");
}
// guard empty case
if non_empty_segments.is_empty() {
bail!(r#"Parsed an empty matcher: "{s}""#);
}
// guard variable syntax leftover fragments
if non_empty_segments
.iter()
.filter_map(Seg::text)
.any(|t| t.contains(['$', '{', '}']))
{
bail!(r#"Invalid matcher: "{s}""#);
}
Ok(Self(non_empty_segments))
}
}
impl Matcher {
fn codegen(&self) -> String {
match self.0.len() {
0 => unreachable!("0-length matcher should never be created"),
// if-let guard would be ideal here
// see: https://github.com/rust-lang/rust/issues/51114
1 if self.0[0].is_text() => {
let s = self.0[0].text().unwrap();
format!(r###"Lazy::new(|| Some(build_matcher_fixed(r#"{s}"#)))"###)
}
// parser logic ensures that this case can only happen when there are dynamic segments
_ => {
let segs = self.0.iter().map(MatcherSegment::codegen).join(", ");
format!(r###"Lazy::new(|| build_matcher_dynamic(&[{segs}]))"###)
}
}
}
}
/// A segment in a matcher.
///
/// Corresponds to `syntax_mapping::MatcherSegment`.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum MatcherSegment {
Text(String),
Env(String),
}
#[allow(dead_code)]
impl MatcherSegment {
fn is_text(&self) -> bool {
matches!(self, Self::Text(_))
}
fn is_env(&self) -> bool {
matches!(self, Self::Env(_))
}
fn text(&self) -> Option<&str> {
match self {
Self::Text(t) => Some(t),
Self::Env(_) => None,
}
}
fn env(&self) -> Option<&str> {
match self {
Self::Text(_) => None,
Self::Env(t) => Some(t),
}
}
fn codegen(&self) -> String {
match self {
Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###),
Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###),
}
}
}
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
#[derive(Clone, Debug, Deserialize)]
struct MappingDefModel {
mappings: IndexMap<MappingTarget, Vec<Matcher>>,
}
impl MappingDefModel {
fn into_mapping_list(self) -> MappingList {
let list = self
.mappings
.into_iter()
.flat_map(|(target, matchers)| {
matchers
.into_iter()
.map(|matcher| (matcher, target.clone()))
.collect::<Vec<_>>()
})
.collect();
MappingList(list)
}
}
#[derive(Clone, Debug)]
struct MappingList(Vec<(Matcher, MappingTarget)>);
impl MappingList {
fn codegen(&self) -> String {
let array_items: Vec<_> = self
.0
.iter()
.map(|(matcher, target)| {
format!("({m}, {t})", m = matcher.codegen(), t = target.codegen())
})
.collect();
let len = array_items.len();
format!(
"/// Generated by build script from /src/syntax_mapping/builtins/.\n\
pub(crate) static BUILTIN_MAPPINGS: [(Lazy<Option<GlobMatcher>>, MappingTarget); {len}] = [\n{items}\n];",
items = array_items.join(",\n")
)
}
}
/// Get the list of paths to all mapping definition files that should be
/// included for the current target platform.
fn get_def_paths() -> anyhow::Result<Vec<PathBuf>> {
let source_subdirs = [
"common",
#[cfg(target_family = "unix")]
"unix-family",
#[cfg(any(
target_os = "freebsd",
target_os = "netbsd",
target_os = "openbsd",
target_os = "macos"
))]
"bsd-family",
#[cfg(target_os = "linux")]
"linux",
#[cfg(target_os = "macos")]
"macos",
#[cfg(target_os = "windows")]
"windows",
];
let mut toml_paths = vec![];
for subdir in source_subdirs {
let wd = WalkDir::new(Path::new("src/syntax_mapping/builtins").join(subdir));
let paths = wd
.into_iter()
.filter_map_ok(|entry| {
let path = entry.path();
(path.is_file() && path.extension().map(|ext| ext == "toml").unwrap_or(false))
.then(|| path.to_owned())
})
.collect::<Result<Vec<_>, _>>()?;
toml_paths.extend(paths);
}
toml_paths.sort_by_key(|path| {
path.file_name()
.expect("file name should not terminate in ..")
.to_owned()
});
Ok(toml_paths)
}
fn read_all_mappings() -> anyhow::Result<MappingList> {
let mut all_mappings = vec![];
for path in get_def_paths()? {
let toml_string = fs::read_to_string(path)?;
let mappings = toml::from_str::<MappingDefModel>(&toml_string)?.into_mapping_list();
all_mappings.extend(mappings.0);
}
let duplicates = all_mappings
.iter()
.duplicates_by(|(matcher, _)| matcher)
.collect_vec();
if !duplicates.is_empty() {
bail!("Rules with duplicate matchers found: {duplicates:?}");
}
Ok(MappingList(all_mappings))
}
/// Build the static syntax mappings defined in /src/syntax_mapping/builtins/
/// into a .rs source file, which is to be inserted with `include!`.
pub fn build_static_mappings() -> anyhow::Result<()> {
println!("cargo:rerun-if-changed=src/syntax_mapping/builtins/");
let mappings = read_all_mappings()?;
let codegen_path = Path::new(&env::var_os("OUT_DIR").ok_or(anyhow!("OUT_DIR is unset"))?)
.join("codegen_static_syntax_mappings.rs");
fs::write(codegen_path, mappings.codegen())?;
Ok(())
}

View file

@ -441,7 +441,7 @@ mod tests {
fn new() -> Self {
SyntaxDetectionTest {
assets: HighlightingAssets::from_binary(),
syntax_mapping: SyntaxMapping::builtin(),
syntax_mapping: SyntaxMapping::new(),
temp_dir: TempDir::new().expect("creation of temporary directory"),
}
}

View file

@ -3,7 +3,7 @@ use std::path::Path;
use std::time::SystemTime;
use semver::Version;
use serde::{Deserialize, Serialize};
use serde_derive::{Deserialize, Serialize};
use crate::error::*;

View file

@ -3,8 +3,7 @@ use super::*;
use std::collections::BTreeMap;
use std::convert::TryFrom;
use serde::Deserialize;
use serde::Serialize;
use serde_derive::{Deserialize, Serialize};
use once_cell::unsync::OnceCell;

View file

@ -121,7 +121,7 @@ impl App {
_ => unreachable!("other values for --paging are not allowed"),
};
let mut syntax_mapping = SyntaxMapping::builtin();
let mut syntax_mapping = SyntaxMapping::new();
if let Some(values) = self.matches.get_many::<String>("ignored-suffix") {
for suffix in values {
@ -130,7 +130,9 @@ impl App {
}
if let Some(values) = self.matches.get_many::<String>("map-syntax") {
for from_to in values {
// later args take precedence over earlier ones, hence `.rev()`
// see: https://github.com/sharkdp/bat/pull/2755#discussion_r1456416875
for from_to in values.rev() {
let parts: Vec<_> = from_to.split(':').collect();
if parts.len() != 2 {

View file

@ -78,9 +78,11 @@ fn run_cache_subcommand(
Ok(())
}
fn get_syntax_mapping_to_paths<'a>(
mappings: &[(GlobMatcher, MappingTarget<'a>)],
) -> HashMap<&'a str, Vec<String>> {
fn get_syntax_mapping_to_paths<'r, 't, I>(mappings: I) -> HashMap<&'t str, Vec<String>>
where
I: IntoIterator<Item = (&'r GlobMatcher, &'r MappingTarget<'t>)>,
't: 'r, // target text outlives rule
{
let mut map = HashMap::new();
for mapping in mappings {
if let (matcher, MappingTarget::MapTo(s)) = mapping {
@ -123,7 +125,7 @@ pub fn get_languages(config: &Config, cache_dir: &Path) -> Result<String> {
languages.sort_by_key(|lang| lang.name.to_uppercase());
let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.mappings());
let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.all_mappings());
for lang in &mut languages {
if let Some(additional_paths) = configured_languages.get(lang.name.as_str()) {

View file

@ -47,7 +47,7 @@ impl<'b> Controller<'b> {
&self,
inputs: Vec<Input>,
output_buffer: Option<&mut dyn std::fmt::Write>,
handle_error: impl Fn(&Error, &mut dyn Write),
mut handle_error: impl FnMut(&Error, &mut dyn Write),
) -> Result<bool> {
let mut output_type;

View file

@ -7,8 +7,6 @@ use nu_ansi_term::Style;
use bytesize::ByteSize;
use console::AnsiCodeIterator;
use syntect::easy::HighlightLines;
use syntect::highlighting::Color;
use syntect::highlighting::Theme;
@ -33,9 +31,23 @@ use crate::line_range::RangeCheckResult;
use crate::preprocessor::{expand_tabs, replace_nonprintable};
use crate::style::StyleComponent;
use crate::terminal::{as_terminal_escaped, to_ansi_color};
use crate::vscreen::AnsiStyle;
use crate::vscreen::{AnsiStyle, EscapeSequence, EscapeSequenceIterator};
use crate::wrapping::WrappingMode;
const ANSI_UNDERLINE_ENABLE: EscapeSequence = EscapeSequence::CSI {
raw_sequence: "\x1B[4m",
parameters: "4",
intermediates: "",
final_byte: "m",
};
const ANSI_UNDERLINE_DISABLE: EscapeSequence = EscapeSequence::CSI {
raw_sequence: "\x1B[24m",
parameters: "24",
intermediates: "",
final_byte: "m",
};
pub enum OutputHandle<'a> {
IoWrite(&'a mut dyn io::Write),
FmtWrite(&'a mut dyn fmt::Write),
@ -287,6 +299,14 @@ impl<'a> InteractivePrinter<'a> {
}
}
fn get_header_component_indent_length(&self) -> usize {
if self.config.style_components.grid() && self.panel_width > 0 {
self.panel_width + 2
} else {
self.panel_width
}
}
fn print_header_component_indent(&mut self, handle: &mut OutputHandle) -> Result<()> {
if self.config.style_components.grid() {
write!(
@ -302,6 +322,30 @@ impl<'a> InteractivePrinter<'a> {
}
}
fn print_header_component_with_indent(
&mut self,
handle: &mut OutputHandle,
content: &str,
) -> Result<()> {
self.print_header_component_indent(handle)?;
writeln!(handle, "{}", content)
}
fn print_header_multiline_component(
&mut self,
handle: &mut OutputHandle,
content: &str,
) -> Result<()> {
let mut content = content;
let content_width = self.config.term_width - self.get_header_component_indent_length();
while content.len() > content_width {
let (content_line, remaining) = content.split_at(content_width);
self.print_header_component_with_indent(handle, content_line)?;
content = remaining;
}
self.print_header_component_with_indent(handle, content)
}
fn preprocess(&self, text: &str, cursor: &mut usize) -> String {
if self.config.tab_width > 0 {
return expand_tabs(text, self.config.tab_width, cursor);
@ -377,12 +421,11 @@ impl<'a> Printer for InteractivePrinter<'a> {
}
}
header_components.iter().try_for_each(|component| {
self.print_header_component_indent(handle)?;
match component {
StyleComponent::HeaderFilename => writeln!(
handle,
header_components
.iter()
.try_for_each(|component| match component {
StyleComponent::HeaderFilename => {
let header_filename = format!(
"{}{}{}",
description
.kind()
@ -390,17 +433,19 @@ impl<'a> Printer for InteractivePrinter<'a> {
.unwrap_or_else(|| "".into()),
self.colors.header_value.paint(description.title()),
mode
),
);
self.print_header_multiline_component(handle, &header_filename)
}
StyleComponent::HeaderFilesize => {
let bsize = metadata
.size
.map(|s| format!("{}", ByteSize(s)))
.unwrap_or_else(|| "-".into());
writeln!(handle, "Size: {}", self.colors.header_value.paint(bsize))
let header_filesize =
format!("Size: {}", self.colors.header_value.paint(bsize));
self.print_header_multiline_component(handle, &header_filesize)
}
_ => Ok(()),
}
})?;
if self.config.style_components.grid() {
@ -521,7 +566,7 @@ impl<'a> Printer for InteractivePrinter<'a> {
self.config.highlighted_lines.0.check(line_number) == RangeCheckResult::InRange;
if highlight_this_line && self.config.theme == "ansi" {
self.ansi_style.update("^[4m");
self.ansi_style.update(ANSI_UNDERLINE_ENABLE);
}
let background_color = self
@ -548,23 +593,17 @@ impl<'a> Printer for InteractivePrinter<'a> {
let italics = self.config.use_italic_text;
for &(style, region) in &regions {
let ansi_iterator = AnsiCodeIterator::new(region);
let ansi_iterator = EscapeSequenceIterator::new(region);
for chunk in ansi_iterator {
match chunk {
// ANSI escape passthrough.
(ansi, true) => {
self.ansi_style.update(ansi);
write!(handle, "{}", ansi)?;
}
// Regular text.
(text, false) => {
let text = &*self.preprocess(text, &mut cursor_total);
EscapeSequence::Text(text) => {
let text = self.preprocess(text, &mut cursor_total);
let text_trimmed = text.trim_end_matches(|c| c == '\r' || c == '\n');
write!(
handle,
"{}",
"{}{}",
as_terminal_escaped(
style,
&format!("{}{}", self.ansi_style, text_trimmed),
@ -572,9 +611,11 @@ impl<'a> Printer for InteractivePrinter<'a> {
colored_output,
italics,
background_color
)
),
self.ansi_style.to_reset_sequence(),
)?;
// Pad the rest of the line.
if text.len() != text_trimmed.len() {
if let Some(background_color) = background_color {
let ansi_style = Style {
@ -592,6 +633,12 @@ impl<'a> Printer for InteractivePrinter<'a> {
write!(handle, "{}", &text[text_trimmed.len()..])?;
}
}
// ANSI escape passthrough.
_ => {
write!(handle, "{}", chunk.raw())?;
self.ansi_style.update(chunk);
}
}
}
}
@ -601,17 +648,11 @@ impl<'a> Printer for InteractivePrinter<'a> {
}
} else {
for &(style, region) in &regions {
let ansi_iterator = AnsiCodeIterator::new(region);
let ansi_iterator = EscapeSequenceIterator::new(region);
for chunk in ansi_iterator {
match chunk {
// ANSI escape passthrough.
(ansi, true) => {
self.ansi_style.update(ansi);
write!(handle, "{}", ansi)?;
}
// Regular text.
(text, false) => {
EscapeSequence::Text(text) => {
let text = self.preprocess(
text.trim_end_matches(|c| c == '\r' || c == '\n'),
&mut cursor_total,
@ -654,7 +695,7 @@ impl<'a> Printer for InteractivePrinter<'a> {
// It wraps.
write!(
handle,
"{}\n{}",
"{}{}\n{}",
as_terminal_escaped(
style,
&format!("{}{}", self.ansi_style, line_buf),
@ -663,6 +704,7 @@ impl<'a> Printer for InteractivePrinter<'a> {
self.config.use_italic_text,
background_color
),
self.ansi_style.to_reset_sequence(),
panel_wrap.clone().unwrap()
)?;
@ -691,6 +733,12 @@ impl<'a> Printer for InteractivePrinter<'a> {
)
)?;
}
// ANSI escape passthrough.
_ => {
write!(handle, "{}", chunk.raw())?;
self.ansi_style.update(chunk);
}
}
}
}
@ -711,8 +759,8 @@ impl<'a> Printer for InteractivePrinter<'a> {
}
if highlight_this_line && self.config.theme == "ansi" {
self.ansi_style.update("^[24m");
write!(handle, "\x1B[24m")?;
write!(handle, "{}", ANSI_UNDERLINE_DISABLE.raw())?;
self.ansi_style.update(ANSI_UNDERLINE_DISABLE);
}
Ok(())

View file

@ -1,12 +1,23 @@
use std::path::Path;
use crate::error::Result;
use ignored_suffixes::IgnoredSuffixes;
use globset::{Candidate, GlobBuilder, GlobMatcher};
use crate::error::Result;
use builtin::BUILTIN_MAPPINGS;
use ignored_suffixes::IgnoredSuffixes;
mod builtin;
pub mod ignored_suffixes;
fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
let matcher = GlobBuilder::new(from)
.case_insensitive(true)
.literal_separator(true)
.build()?
.compile_matcher();
Ok(matcher)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum MappingTarget<'a> {
@ -29,204 +40,72 @@ pub enum MappingTarget<'a> {
#[derive(Debug, Clone, Default)]
pub struct SyntaxMapping<'a> {
mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
/// User-defined mappings at run time.
///
/// Rules in front have precedence.
custom_mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
pub(crate) ignored_suffixes: IgnoredSuffixes<'a>,
}
impl<'a> SyntaxMapping<'a> {
pub fn empty() -> SyntaxMapping<'a> {
pub fn new() -> SyntaxMapping<'a> {
Default::default()
}
pub fn builtin() -> SyntaxMapping<'a> {
let mut mapping = Self::empty();
mapping.insert("*.h", MappingTarget::MapTo("C++")).unwrap();
mapping
.insert(".clang-format", MappingTarget::MapTo("YAML"))
.unwrap();
mapping.insert("*.fs", MappingTarget::MapTo("F#")).unwrap();
mapping
.insert("build", MappingTarget::MapToUnknown)
.unwrap();
mapping
.insert("**/.ssh/config", MappingTarget::MapTo("SSH Config"))
.unwrap();
mapping
.insert(
"**/bat/config",
MappingTarget::MapTo("Bourne Again Shell (bash)"),
)
.unwrap();
mapping
.insert(
"/etc/profile",
MappingTarget::MapTo("Bourne Again Shell (bash)"),
)
.unwrap();
mapping
.insert(
"os-release",
MappingTarget::MapTo("Bourne Again Shell (bash)"),
)
.unwrap();
mapping
.insert("*.pac", MappingTarget::MapTo("JavaScript (Babel)"))
.unwrap();
mapping
.insert("fish_history", MappingTarget::MapTo("YAML"))
.unwrap();
for glob in ["*.jsonl", "*.sarif"] {
mapping.insert(glob, MappingTarget::MapTo("JSON")).unwrap();
}
// See #2151, https://nmap.org/book/nse-language.html
mapping
.insert("*.nse", MappingTarget::MapTo("Lua"))
.unwrap();
// See #1008
mapping
.insert("rails", MappingTarget::MapToUnknown)
.unwrap();
mapping
.insert("Containerfile", MappingTarget::MapTo("Dockerfile"))
.unwrap();
mapping
.insert("*.ksh", MappingTarget::MapTo("Bourne Again Shell (bash)"))
.unwrap();
// Nginx and Apache syntax files both want to style all ".conf" files
// see #1131 and #1137
mapping
.insert("*.conf", MappingTarget::MapExtensionToUnknown)
.unwrap();
for glob in &[
"/etc/nginx/**/*.conf",
"/etc/nginx/sites-*/**/*",
"nginx.conf",
"mime.types",
] {
mapping.insert(glob, MappingTarget::MapTo("nginx")).unwrap();
}
for glob in &[
"/etc/apache2/**/*.conf",
"/etc/apache2/sites-*/**/*",
"httpd.conf",
] {
mapping
.insert(glob, MappingTarget::MapTo("Apache Conf"))
.unwrap();
}
for glob in &[
"**/systemd/**/*.conf",
"**/systemd/**/*.example",
"*.automount",
"*.device",
"*.dnssd",
"*.link",
"*.mount",
"*.netdev",
"*.network",
"*.nspawn",
"*.path",
"*.service",
"*.scope",
"*.slice",
"*.socket",
"*.swap",
"*.target",
"*.timer",
] {
mapping.insert(glob, MappingTarget::MapTo("INI")).unwrap();
}
// unix mail spool
for glob in &["/var/spool/mail/*", "/var/mail/*"] {
mapping.insert(glob, MappingTarget::MapTo("Email")).unwrap()
}
// pacman hooks
mapping
.insert("*.hook", MappingTarget::MapTo("INI"))
.unwrap();
mapping
.insert("*.ron", MappingTarget::MapTo("Rust"))
.unwrap();
// Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/`
// See e.g. https://git-scm.com/docs/git-config#FILES
match (
std::env::var_os("XDG_CONFIG_HOME").filter(|val| !val.is_empty()),
std::env::var_os("HOME")
.filter(|val| !val.is_empty())
.map(|home| Path::new(&home).join(".config")),
) {
(Some(xdg_config_home), Some(default_config_home))
if xdg_config_home == default_config_home => {
insert_git_config_global(&mut mapping, &xdg_config_home)
}
(Some(xdg_config_home), Some(default_config_home)) /* else guard */ => {
insert_git_config_global(&mut mapping, &xdg_config_home);
insert_git_config_global(&mut mapping, &default_config_home)
}
(Some(config_home), None) => insert_git_config_global(&mut mapping, &config_home),
(None, Some(config_home)) => insert_git_config_global(&mut mapping, &config_home),
(None, None) => (),
};
fn insert_git_config_global(mapping: &mut SyntaxMapping, config_home: impl AsRef<Path>) {
let git_config_path = config_home.as_ref().join("git");
mapping
.insert(
&git_config_path.join("config").to_string_lossy(),
MappingTarget::MapTo("Git Config"),
)
.ok();
mapping
.insert(
&git_config_path.join("ignore").to_string_lossy(),
MappingTarget::MapTo("Git Ignore"),
)
.ok();
mapping
.insert(
&git_config_path.join("attributes").to_string_lossy(),
MappingTarget::MapTo("Git Attributes"),
)
.ok();
}
mapping
}
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let glob = GlobBuilder::new(from)
.case_insensitive(true)
.literal_separator(true)
.build()?;
self.mappings.push((glob.compile_matcher(), to));
let matcher = make_glob_matcher(from)?;
self.custom_mappings.push((matcher, to));
Ok(())
}
pub fn mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] {
&self.mappings
/// Returns an iterator over all mappings. User-defined mappings are listed
/// before builtin mappings; mappings in front have higher precedence.
///
/// Builtin mappings' `GlobMatcher`s are lazily compiled.
///
/// Note that this function only returns mappings that are valid under the
/// current environment. For details see [`Self::builtin_mappings`].
pub fn all_mappings(&self) -> impl Iterator<Item = (&GlobMatcher, &MappingTarget<'a>)> {
self.custom_mappings()
.iter()
.map(|(matcher, target)| (matcher, target)) // as_ref
.chain(
// we need a map with a closure to "do" the lifetime variance
// see: https://discord.com/channels/273534239310479360/1120124565591425034/1170543402870382653
// also, clippy false positive:
// see: https://github.com/rust-lang/rust-clippy/issues/9280
#[allow(clippy::map_identity)]
self.builtin_mappings().map(|rule| rule),
)
}
pub(crate) fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
/// Returns an iterator over all valid builtin mappings. Mappings in front
/// have higher precedence.
///
/// The `GlabMatcher`s are lazily compiled.
///
/// Mappings that are invalid under the current environment (i.e. rule
/// requires environment variable(s) that is unset, or the joined string
/// after variable(s) replacement is not a valid glob expression) are
/// ignored.
pub fn builtin_mappings(
&self,
) -> impl Iterator<Item = (&'static GlobMatcher, &'static MappingTarget<'static>)> {
BUILTIN_MAPPINGS
.iter()
.filter_map(|(matcher, target)| matcher.as_ref().map(|glob| (glob, target)))
}
/// Returns all user-defined mappings.
pub fn custom_mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] {
&self.custom_mappings
}
pub fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
// Try matching on the file name as-is.
let candidate = Candidate::new(&path);
let candidate_filename = path.as_ref().file_name().map(Candidate::new);
for (ref glob, ref syntax) in self.mappings.iter().rev() {
for (glob, syntax) in self.all_mappings() {
if glob.is_match_candidate(&candidate)
|| candidate_filename
.as_ref()
@ -252,9 +131,46 @@ impl<'a> SyntaxMapping<'a> {
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic() {
let mut map = SyntaxMapping::empty();
fn builtin_mappings_work() {
let map = SyntaxMapping::new();
assert_eq!(
map.get_syntax_for("/path/to/build"),
Some(MappingTarget::MapToUnknown)
);
}
#[test]
fn all_fixed_builtin_mappings_can_compile() {
let map = SyntaxMapping::new();
// collect call evaluates all lazy closures
// fixed builtin mappings will panic if they fail to compile
let _mappings = map.builtin_mappings().collect::<Vec<_>>();
}
#[test]
fn builtin_mappings_matcher_only_compile_once() {
let map = SyntaxMapping::new();
let two_iterations: Vec<_> = (0..2)
.map(|_| {
// addresses of every matcher
map.builtin_mappings()
.map(|(matcher, _)| matcher as *const _ as usize)
.collect::<Vec<_>>()
})
.collect();
// if the matchers are only compiled once, their address should remain the same
assert_eq!(two_iterations[0], two_iterations[1]);
}
#[test]
fn custom_mappings_work() {
let mut map = SyntaxMapping::new();
map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML"))
.ok();
map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore"))
@ -273,52 +189,32 @@ mod tests {
}
#[test]
fn user_can_override_builtin_mappings() {
let mut map = SyntaxMapping::builtin();
fn custom_mappings_override_builtin() {
let mut map = SyntaxMapping::new();
assert_eq!(
map.get_syntax_for("/etc/profile"),
Some(MappingTarget::MapTo("Bourne Again Shell (bash)"))
map.get_syntax_for("/path/to/httpd.conf"),
Some(MappingTarget::MapTo("Apache Conf"))
);
map.insert("/etc/profile", MappingTarget::MapTo("My Syntax"))
map.insert("httpd.conf", MappingTarget::MapTo("My Syntax"))
.ok();
assert_eq!(
map.get_syntax_for("/etc/profile"),
map.get_syntax_for("/path/to/httpd.conf"),
Some(MappingTarget::MapTo("My Syntax"))
);
}
#[test]
fn builtin_mappings() {
let map = SyntaxMapping::builtin();
fn custom_mappings_precedence() {
let mut map = SyntaxMapping::new();
map.insert("/path/to/foo", MappingTarget::MapTo("alpha"))
.ok();
map.insert("/path/to/foo", MappingTarget::MapTo("bravo"))
.ok();
assert_eq!(
map.get_syntax_for("/path/to/build"),
Some(MappingTarget::MapToUnknown)
map.get_syntax_for("/path/to/foo"),
Some(MappingTarget::MapTo("alpha"))
);
}
#[test]
/// verifies that SyntaxMapping::builtin() doesn't repeat `Glob`-based keys
fn no_duplicate_builtin_keys() {
let mappings = SyntaxMapping::builtin().mappings;
for i in 0..mappings.len() {
let tail = mappings[i + 1..].into_iter();
let (dupl, _): (Vec<_>, Vec<_>) =
tail.partition(|item| item.0.glob() == mappings[i].0.glob());
// emit repeats on failure
assert_eq!(
dupl.len(),
0,
"Glob pattern `{}` mapped to multiple: {:?}",
mappings[i].0.glob().glob(),
{
let (_, mut dupl_targets): (Vec<GlobMatcher>, Vec<MappingTarget>) =
dupl.into_iter().cloned().unzip();
dupl_targets.push(mappings[i].1)
},
)
}
}
}

View file

@ -0,0 +1,91 @@
use std::env;
use globset::GlobMatcher;
use once_cell::sync::Lazy;
use crate::syntax_mapping::{make_glob_matcher, MappingTarget};
// Static syntax mappings generated from /src/syntax_mapping/builtins/ by the
// build script (/build/syntax_mapping.rs).
include!(concat!(
env!("OUT_DIR"),
"/codegen_static_syntax_mappings.rs"
));
// The defined matcher strings are analysed at compile time and converted into
// lazily-compiled `GlobMatcher`s. This is so that the string searches are moved
// from run time to compile time, thus improving startup performance.
//
// To any future maintainer (including possibly myself) wondering why there is
// not a `BuiltinMatcher` enum that looks like this:
//
// ```
// enum BuiltinMatcher {
// Fixed(&'static str),
// Dynamic(Lazy<Option<String>>),
// }
// ```
//
// Because there was. I tried it and threw it out.
//
// Naively looking at the problem from a distance, this may seem like a good
// design (strongly typed etc. etc.). It would also save on compiled size by
// extracting out common behaviour into functions. But while actually
// implementing the lazy matcher compilation logic, I realised that it's most
// convenient for `BUILTIN_MAPPINGS` to have the following type:
//
// `[(Lazy<Option<GlobMatcher>>, MappingTarget); N]`
//
// The benefit for this is that operations like listing all builtin mappings
// would be effectively memoised. The caller would not have to compile another
// `GlobMatcher` for rules that they have previously visited.
//
// Unfortunately, this means we are going to have to store a distinct closure
// for each rule anyway, which makes a `BuiltinMatcher` enum a pointless layer
// of indirection.
//
// In the current implementation, the closure within each generated rule simply
// calls either `build_matcher_fixed` or `build_matcher_dynamic`, depending on
// whether the defined matcher contains dynamic segments or not.
/// Compile a fixed glob string into a glob matcher.
///
/// A failure to compile is a fatal error.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
fn build_matcher_fixed(from: &str) -> GlobMatcher {
make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile")
}
/// Join a list of matcher segments to create a glob string, replacing all
/// environment variables, then compile to a glob matcher.
///
/// Returns `None` if any replacement fails, or if the joined glob string fails
/// to compile.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
// join segments
let mut buf = String::new();
for seg in segs {
match seg {
MatcherSegment::Text(s) => buf.push_str(s),
MatcherSegment::Env(var) => {
let replaced = env::var(var).ok()?;
buf.push_str(&replaced);
}
}
}
// compile glob matcher
let matcher = make_glob_matcher(&buf).ok()?;
Some(matcher)
}
/// A segment of a dynamic builtin matcher.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
#[derive(Clone, Debug)]
enum MatcherSegment {
Text(&'static str),
Env(&'static str),
}

View file

@ -0,0 +1,116 @@
# `/src/syntax_mapping/builtins`
The files in this directory define path/name-based syntax mappings, which amend
and take precedence over the extension/content-based syntax mappings provided by
[syntect](https://github.com/trishume/syntect).
## File organisation
Each TOML file should describe the syntax mappings of a single application, or
otherwise a set of logically-related rules.
What defines "a single application" here is deliberately vague, since the
file-splitting is purely for maintainability reasons. (Technically, we could
just as well use a single TOML file.) So just use common sense.
TOML files should reside in the corresponding subdirectory of the platform(s)
that they intend to target. At compile time, the build script will go through
each subdirectory that is applicable to the compilation target, collect the
syntax mappings defined by all TOML files, and embed them into the binary.
## File syntax
Each TOML file should contain a single section named `mappings`, with each of
its keys being a language identifier (first column of `bat -L`; also referred to
as "target").
The value of each key should be an array of strings, with each item being a glob
matcher. We will call each of these items a "rule".
For example, if `foo-application` uses both TOML and YAML configuration files,
we could write something like this:
```toml
# 30-foo-application.toml
[mappings]
"TOML" = [
# rules for TOML syntax go here
"/usr/share/foo-application/toml-config/*.conf",
"/etc/foo-application/toml-config/*.conf",
]
"YAML" = [
# rules for YAML syntax go here
# ...
]
```
### Dynamic environment variable replacement
In additional to the standard glob matcher syntax, rules also support dynamic
replacement of environment variables at runtime. This allows us to concisely
handle things like [XDG](https://specifications.freedesktop.org/basedir-spec/latest/).
All environment variables intended to be replaced at runtime must be enclosed in
`${}`, for example `"/foo/*/${YOUR_ENV}-suffix/*.log"`. Note that this is the
**only** admissible syntax; other variable substitution syntaxes are not
supported and will either cause a compile time error, or be treated as plain
text.
For example, if `foo-application` also supports per-user configuration files, we
could write something like this:
```toml
# 30-foo-application.toml
[mappings]
"TOML" = [
# rules for TOML syntax go here
"/usr/share/foo-application/toml-config/*.conf",
"/etc/foo-application/toml-config/*.conf",
"${XDG_CONFIG_HOME}/foo-application/toml-config/*.conf",
"${HOME}/.config/foo-application/toml-config/*.conf",
]
"YAML" = [
# rules for YAML syntax go here
# ...
]
```
If any environment variable replacement in a rule fails (for example when a
variable is unset), or if the glob string after replacements is invalid, the
entire rule will be ignored.
### Explicitly mapping to unknown
Sometimes it may be necessary to "unset" a particular syntect mapping - perhaps
a syntax's matching rules are "too greedy", and is claiming files that it should
not. In this case, there are two special identifiers:
`MappingTarget::MapToUnknown` and `MappingTarget::MapExtensionToUnknown`
(corresponding to the two variants of the `syntax_mapping::MappingTarget` enum).
An example of this would be `*.conf` files in general. So we may write something
like this:
```toml
# 99-unset-ambiguous-extensions.toml
[mappings]
"MappingTarget::MapExtensionToUnknown" = [
"*.conf",
]
```
## Ordering
At compile time, all TOML files applicable to the target are processed in
lexicographical filename order. So `00-foo.toml` takes precedence over
`10-bar.toml`, which takes precedence over `20-baz.toml`, and so on. Note that
**only** the filenames of the TOML files are taken into account; the
subdirectories they are placed in have no influence on ordering.
This behaviour can be occasionally useful for creating high/low priority rules,
such as in the aforementioned example of explicitly mapping `*.conf` files to
unknown. Generally this should not be much of a concern though, since rules
should be written as specifically as possible for each application.
Rules within each TOML file are processed (and therefore matched) in the order
in which they are defined. At runtime, the syntax selection algorithm will
short-circuit and return the target of the first matching rule.

View file

@ -0,0 +1,2 @@
[mappings]
"Bourne Again Shell (bash)" = ["/etc/os-release", "/var/run/os-release"]

View file

@ -0,0 +1,2 @@
[mappings]
"Apache Conf" = ["httpd.conf"]

View file

@ -0,0 +1,2 @@
[mappings]
"Bourne Again Shell (bash)" = ["**/bat/config"]

View file

@ -0,0 +1,2 @@
[mappings]
"Dockerfile" = ["Containerfile"]

View file

@ -0,0 +1,6 @@
[mappings]
"C++" = [
# probably better than the default Objective C mapping #877
"*.h",
]
"YAML" = [".clang-format"]

View file

@ -0,0 +1,2 @@
[mappings]
"F#" = ["*.fs"]

View file

@ -0,0 +1,10 @@
# Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/`
# See e.g. https://git-scm.com/docs/git-config#FILES
[mappings]
"Git Config" = ["${XDG_CONFIG_HOME}/git/config", "${HOME}/.config/git/config"]
"Git Ignore" = ["${XDG_CONFIG_HOME}/git/ignore", "${HOME}/.config/git/ignore"]
"Git Attributes" = [
"${XDG_CONFIG_HOME}/git/attributes",
"${HOME}/.config/git/attributes",
]

View file

@ -0,0 +1,3 @@
# JSON Lines is a simple variation of JSON #2535
[mappings]
"JSON" = ["*.jsonl"]

View file

@ -0,0 +1,2 @@
[mappings]
"nginx" = ["nginx.conf", "mime.types"]

View file

@ -0,0 +1,3 @@
[mappings]
# See #2151, https://nmap.org/book/nse-language.html
"Lua" = ["*.nse"]

View file

@ -0,0 +1,3 @@
# 1515
[mappings]
"JavaScript (Babel)" = ["*.pac"]

View file

@ -0,0 +1,3 @@
# Rusty Object Notation #2427
[mappings]
"Rust" = ["*.ron"]

View file

@ -0,0 +1,3 @@
# SARIF is a format for reporting static analysis results #2695
[mappings]
"JSON" = ["*.sarif"]

View file

@ -0,0 +1,2 @@
[mappings]
"SSH Config" = ["**/.ssh/config"]

View file

@ -0,0 +1,5 @@
[mappings]
"MappingTarget::MapExtensionToUnknown" = [
# common extension used for all kinds of formats
"*.conf",
]

View file

@ -0,0 +1,7 @@
[mappings]
"MappingTarget::MapToUnknown" = [
# "NAnt Build File" should only match *.build files, not files named "build"
"build",
# "bin/rails" scripts in a Ruby project misidentified as HTML (Rails) #1008
"rails",
]

View file

@ -0,0 +1,7 @@
[mappings]
"Bourne Again Shell (bash)" = [
"/etc/os-release",
"/usr/lib/os-release",
"/etc/initrd-release",
"/usr/lib/extension-release.d/extension-release.*",
]

View file

@ -0,0 +1,3 @@
[mappings]
# pacman hooks
"INI" = ["/usr/share/libalpm/hooks/*.hook", "/etc/pacman.d/hooks/*.hook"]

View file

@ -0,0 +1,21 @@
[mappings]
"INI" = [
"**/systemd/**/*.conf",
"**/systemd/**/*.example",
"*.automount",
"*.device",
"*.dnssd",
"*.link",
"*.mount",
"*.netdev",
"*.network",
"*.nspawn",
"*.path",
"*.service",
"*.scope",
"*.slice",
"*.socket",
"*.swap",
"*.target",
"*.timer",
]

View file

@ -0,0 +1,2 @@
[mappings]
"Apache Conf" = ["/etc/apache2/**/*.conf", "/etc/apache2/sites-*/**/*"]

View file

@ -0,0 +1,2 @@
[mappings]
"YAML" = ["fish_history"]

View file

@ -0,0 +1,3 @@
# KornShell is backward-compatible with the Bourne shell #2633
[mappings]
"Bourne Again Shell (bash)" = ["*.ksh"]

View file

@ -0,0 +1,2 @@
[mappings]
"Email" = ["/var/spool/mail/*", "/var/mail/*"]

View file

@ -0,0 +1,2 @@
[mappings]
"nginx" = ["/etc/nginx/**/*.conf", "/etc/nginx/sites-*/**/*"]

View file

@ -0,0 +1,5 @@
[mappings]
"Bourne Again Shell (bash)" = [
# used by lots of shells
"/etc/profile",
]

View file

@ -1,4 +1,8 @@
use std::fmt::{Display, Formatter};
use std::{
fmt::{Display, Formatter},
iter::Peekable,
str::CharIndices,
};
// Wrapper to avoid unnecessary branching when input doesn't have ANSI escape sequences.
pub struct AnsiStyle {
@ -10,7 +14,7 @@ impl AnsiStyle {
AnsiStyle { attributes: None }
}
pub fn update(&mut self, sequence: &str) -> bool {
pub fn update(&mut self, sequence: EscapeSequence) -> bool {
match &mut self.attributes {
Some(a) => a.update(sequence),
None => {
@ -19,6 +23,13 @@ impl AnsiStyle {
}
}
}
pub fn to_reset_sequence(&mut self) -> String {
match &mut self.attributes {
Some(a) => a.to_reset_sequence(),
None => String::new(),
}
}
}
impl Display for AnsiStyle {
@ -31,6 +42,8 @@ impl Display for AnsiStyle {
}
struct Attributes {
has_sgr_sequences: bool,
foreground: String,
background: String,
underlined: String,
@ -61,11 +74,20 @@ struct Attributes {
/// ON: ^[9m
/// OFF: ^[29m
strike: String,
/// The hyperlink sequence.
/// FORMAT: \x1B]8;{ID};{URL}\e\\
///
/// `\e\\` may be replaced with BEL `\x07`.
/// Setting both {ID} and {URL} to an empty string represents no hyperlink.
hyperlink: String,
}
impl Attributes {
pub fn new() -> Self {
Attributes {
has_sgr_sequences: false,
foreground: "".to_owned(),
background: "".to_owned(),
underlined: "".to_owned(),
@ -76,34 +98,56 @@ impl Attributes {
underline: "".to_owned(),
italic: "".to_owned(),
strike: "".to_owned(),
hyperlink: "".to_owned(),
}
}
/// Update the attributes with an escape sequence.
/// Returns `false` if the sequence is unsupported.
pub fn update(&mut self, sequence: &str) -> bool {
let mut chars = sequence.char_indices().skip(1);
pub fn update(&mut self, sequence: EscapeSequence) -> bool {
use EscapeSequence::*;
match sequence {
Text(_) => return false,
Unknown(_) => { /* defer to update_with_unsupported */ }
OSC {
raw_sequence,
command,
..
} => {
if command.starts_with("8;") {
return self.update_with_hyperlink(raw_sequence);
}
/* defer to update_with_unsupported */
}
CSI {
final_byte,
parameters,
..
} => {
match final_byte {
"m" => return self.update_with_sgr(parameters),
_ => {
// NOTE(eth-p): We might want to ignore these, since they involve cursor or buffer manipulation.
/* defer to update_with_unsupported */
}
}
}
NF { nf_sequence, .. } => {
let mut iter = nf_sequence.chars();
match iter.next() {
Some('(') => return self.update_with_charset('(', iter),
Some(')') => return self.update_with_charset(')', iter),
_ => { /* defer to update_with_unsupported */ }
}
}
}
if let Some((_, t)) = chars.next() {
match t {
'(' => self.update_with_charset('(', chars.map(|(_, c)| c)),
')' => self.update_with_charset(')', chars.map(|(_, c)| c)),
'[' => {
if let Some((i, last)) = chars.last() {
// SAFETY: Always starts with ^[ and ends with m.
self.update_with_csi(last, &sequence[2..i])
} else {
false
}
}
_ => self.update_with_unsupported(sequence),
}
} else {
false
}
self.update_with_unsupported(sequence.raw())
}
fn sgr_reset(&mut self) {
self.has_sgr_sequences = false;
self.foreground.clear();
self.background.clear();
self.underlined.clear();
@ -121,6 +165,7 @@ impl Attributes {
.map(|p| p.parse::<u16>())
.map(|p| p.unwrap_or(0)); // Treat errors as 0.
self.has_sgr_sequences = true;
while let Some(p) = iter.next() {
match p {
0 => self.sgr_reset(),
@ -149,19 +194,23 @@ impl Attributes {
true
}
fn update_with_csi(&mut self, finalizer: char, sequence: &str) -> bool {
if finalizer == 'm' {
self.update_with_sgr(sequence)
} else {
false
}
}
fn update_with_unsupported(&mut self, sequence: &str) -> bool {
self.unknown_buffer.push_str(sequence);
false
}
fn update_with_hyperlink(&mut self, sequence: &str) -> bool {
if sequence == "8;;" {
// Empty hyperlink ID and HREF -> end of hyperlink.
self.hyperlink.clear();
} else {
self.hyperlink.clear();
self.hyperlink.push_str(sequence);
}
true
}
fn update_with_charset(&mut self, kind: char, set: impl Iterator<Item = char>) -> bool {
self.charset = format!("\x1B{}{}", kind, set.take(1).collect::<String>());
true
@ -179,13 +228,35 @@ impl Attributes {
_ => format!("\x1B[{}m", color),
}
}
/// Gets an ANSI escape sequence to reset all the known attributes.
pub fn to_reset_sequence(&self) -> String {
let mut buf = String::with_capacity(17);
// TODO: Enable me in a later pull request.
// if self.has_sgr_sequences {
// buf.push_str("\x1B[m");
// }
if !self.hyperlink.is_empty() {
buf.push_str("\x1B]8;;\x1B\\"); // Disable hyperlink.
}
// TODO: Enable me in a later pull request.
// if !self.charset.is_empty() {
// // https://espterm.github.io/docs/VT100%20escape%20codes.html
// buf.push_str("\x1B(B\x1B)B"); // setusg0 and setusg1
// }
buf
}
}
impl Display for Attributes {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}{}{}{}{}{}{}{}{}",
"{}{}{}{}{}{}{}{}{}{}",
self.foreground,
self.background,
self.underlined,
@ -195,6 +266,7 @@ impl Display for Attributes {
self.underline,
self.italic,
self.strike,
self.hyperlink,
)
}
}
@ -210,3 +282,612 @@ fn join(
.collect::<Vec<String>>()
.join(delimiter)
}
/// A range of indices for a raw ANSI escape sequence.
#[derive(Debug, PartialEq)]
enum EscapeSequenceOffsets {
Text {
start: usize,
end: usize,
},
Unknown {
start: usize,
end: usize,
},
NF {
// https://en.wikipedia.org/wiki/ANSI_escape_code#nF_Escape_sequences
start_sequence: usize,
start: usize,
end: usize,
},
OSC {
// https://en.wikipedia.org/wiki/ANSI_escape_code#OSC_(Operating_System_Command)_sequences
start_sequence: usize,
start_command: usize,
start_terminator: usize,
end: usize,
},
CSI {
// https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
start_sequence: usize,
start_parameters: usize,
start_intermediates: usize,
start_final_byte: usize,
end: usize,
},
}
/// An iterator over the offests of ANSI/VT escape sequences within a string.
///
/// ## Example
///
/// ```ignore
/// let iter = EscapeSequenceOffsetsIterator::new("\x1B[33mThis is yellow text.\x1B[m");
/// ```
struct EscapeSequenceOffsetsIterator<'a> {
text: &'a str,
chars: Peekable<CharIndices<'a>>,
}
impl<'a> EscapeSequenceOffsetsIterator<'a> {
pub fn new(text: &'a str) -> EscapeSequenceOffsetsIterator<'a> {
return EscapeSequenceOffsetsIterator {
text,
chars: text.char_indices().peekable(),
};
}
/// Takes values from the iterator while the predicate returns true.
/// If the predicate returns false, that value is left.
fn chars_take_while(&mut self, pred: impl Fn(char) -> bool) -> Option<(usize, usize)> {
if self.chars.peek().is_none() {
return None;
}
let start = self.chars.peek().unwrap().0;
let mut end: usize = start;
while let Some((i, c)) = self.chars.peek() {
if !pred(*c) {
break;
}
end = *i + c.len_utf8();
self.chars.next();
}
Some((start, end))
}
fn next_text(&mut self) -> Option<EscapeSequenceOffsets> {
match self.chars_take_while(|c| c != '\x1B') {
None => None,
Some((start, end)) => Some(EscapeSequenceOffsets::Text { start, end }),
}
}
fn next_sequence(&mut self) -> Option<EscapeSequenceOffsets> {
let (start_sequence, c) = self.chars.next().expect("to not be finished");
match self.chars.peek() {
None => Some(EscapeSequenceOffsets::Unknown {
start: start_sequence,
end: start_sequence + c.len_utf8(),
}),
Some((_, ']')) => self.next_osc(start_sequence),
Some((_, '[')) => self.next_csi(start_sequence),
Some((i, c)) => match c {
'\x20'..='\x2F' => self.next_nf(start_sequence),
c => Some(EscapeSequenceOffsets::Unknown {
start: start_sequence,
end: i + c.len_utf8(),
}),
},
}
}
fn next_osc(&mut self, start_sequence: usize) -> Option<EscapeSequenceOffsets> {
let (osc_open_index, osc_open_char) = self.chars.next().expect("to not be finished");
debug_assert_eq!(osc_open_char, ']');
let mut start_terminator: usize;
let mut end_sequence: usize;
loop {
match self.chars_take_while(|c| !matches!(c, '\x07' | '\x1B')) {
None => {
start_terminator = self.text.len();
end_sequence = start_terminator;
break;
}
Some((_, end)) => {
start_terminator = end;
end_sequence = end;
}
}
match self.chars.next() {
Some((ti, '\x07')) => {
end_sequence = ti + '\x07'.len_utf8();
break;
}
Some((ti, '\x1B')) => {
match self.chars.next() {
Some((i, '\\')) => {
end_sequence = i + '\\'.len_utf8();
break;
}
None => {
end_sequence = ti + '\x1B'.len_utf8();
break;
}
_ => {
// Repeat, since `\\`(anything) isn't a valid ST.
}
}
}
None => {
// Prematurely ends.
break;
}
Some((_, tc)) => {
panic!("this should not be reached: char {:?}", tc)
}
}
}
Some(EscapeSequenceOffsets::OSC {
start_sequence,
start_command: osc_open_index + osc_open_char.len_utf8(),
start_terminator: start_terminator,
end: end_sequence,
})
}
fn next_csi(&mut self, start_sequence: usize) -> Option<EscapeSequenceOffsets> {
let (csi_open_index, csi_open_char) = self.chars.next().expect("to not be finished");
debug_assert_eq!(csi_open_char, '[');
let start_parameters: usize = csi_open_index + csi_open_char.len_utf8();
// Keep iterating while within the range of `0x30-0x3F`.
let mut start_intermediates: usize = start_parameters;
if let Some((_, end)) = self.chars_take_while(|c| matches!(c, '\x30'..='\x3F')) {
start_intermediates = end;
}
// Keep iterating while within the range of `0x20-0x2F`.
let mut start_final_byte: usize = start_intermediates;
if let Some((_, end)) = self.chars_take_while(|c| matches!(c, '\x20'..='\x2F')) {
start_final_byte = end;
}
// Take the last char.
let end_of_sequence = match self.chars.next() {
None => start_final_byte,
Some((i, c)) => i + c.len_utf8(),
};
Some(EscapeSequenceOffsets::CSI {
start_sequence,
start_parameters,
start_intermediates,
start_final_byte,
end: end_of_sequence,
})
}
fn next_nf(&mut self, start_sequence: usize) -> Option<EscapeSequenceOffsets> {
let (nf_open_index, nf_open_char) = self.chars.next().expect("to not be finished");
debug_assert!(matches!(nf_open_char, '\x20'..='\x2F'));
let start: usize = nf_open_index;
let mut end: usize = start;
// Keep iterating while within the range of `0x20-0x2F`.
match self.chars_take_while(|c| matches!(c, '\x20'..='\x2F')) {
Some((_, i)) => end = i,
None => {
return Some(EscapeSequenceOffsets::NF {
start_sequence,
start,
end,
})
}
}
// Get the final byte.
match self.chars.next() {
Some((i, c)) => end = i + c.len_utf8(),
None => {}
}
Some(EscapeSequenceOffsets::NF {
start_sequence,
start,
end,
})
}
}
impl<'a> Iterator for EscapeSequenceOffsetsIterator<'a> {
type Item = EscapeSequenceOffsets;
fn next(&mut self) -> Option<Self::Item> {
match self.chars.peek() {
Some((_, '\x1B')) => self.next_sequence(),
Some((_, _)) => self.next_text(),
None => None,
}
}
}
/// An iterator over ANSI/VT escape sequences within a string.
///
/// ## Example
///
/// ```ignore
/// let iter = EscapeSequenceIterator::new("\x1B[33mThis is yellow text.\x1B[m");
/// ```
pub struct EscapeSequenceIterator<'a> {
text: &'a str,
offset_iter: EscapeSequenceOffsetsIterator<'a>,
}
impl<'a> EscapeSequenceIterator<'a> {
pub fn new(text: &'a str) -> EscapeSequenceIterator<'a> {
return EscapeSequenceIterator {
text,
offset_iter: EscapeSequenceOffsetsIterator::new(text),
};
}
}
impl<'a> Iterator for EscapeSequenceIterator<'a> {
type Item = EscapeSequence<'a>;
fn next(&mut self) -> Option<Self::Item> {
use EscapeSequenceOffsets::*;
self.offset_iter.next().map(|offsets| match offsets {
Unknown { start, end } => EscapeSequence::Unknown(&self.text[start..end]),
Text { start, end } => EscapeSequence::Text(&self.text[start..end]),
NF {
start_sequence,
start,
end,
} => EscapeSequence::NF {
raw_sequence: &self.text[start_sequence..end],
nf_sequence: &self.text[start..end],
},
OSC {
start_sequence,
start_command,
start_terminator,
end,
} => EscapeSequence::OSC {
raw_sequence: &self.text[start_sequence..end],
command: &self.text[start_command..start_terminator],
terminator: &self.text[start_terminator..end],
},
CSI {
start_sequence,
start_parameters,
start_intermediates,
start_final_byte,
end,
} => EscapeSequence::CSI {
raw_sequence: &self.text[start_sequence..end],
parameters: &self.text[start_parameters..start_intermediates],
intermediates: &self.text[start_intermediates..start_final_byte],
final_byte: &self.text[start_final_byte..end],
},
})
}
}
/// A parsed ANSI/VT100 escape sequence.
#[derive(Debug, PartialEq)]
pub enum EscapeSequence<'a> {
Text(&'a str),
Unknown(&'a str),
NF {
raw_sequence: &'a str,
nf_sequence: &'a str,
},
OSC {
raw_sequence: &'a str,
command: &'a str,
terminator: &'a str,
},
CSI {
raw_sequence: &'a str,
parameters: &'a str,
intermediates: &'a str,
final_byte: &'a str,
},
}
impl<'a> EscapeSequence<'a> {
pub fn raw(&self) -> &'a str {
use EscapeSequence::*;
match *self {
Text(raw) => raw,
Unknown(raw) => raw,
NF { raw_sequence, .. } => raw_sequence,
OSC { raw_sequence, .. } => raw_sequence,
CSI { raw_sequence, .. } => raw_sequence,
}
}
}
#[cfg(test)]
mod tests {
use crate::vscreen::{
EscapeSequence, EscapeSequenceIterator, EscapeSequenceOffsets,
EscapeSequenceOffsetsIterator,
};
#[test]
fn test_escape_sequence_offsets_iterator_parses_text() {
let mut iter = EscapeSequenceOffsetsIterator::new("text");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::Text { start: 0, end: 4 })
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_text_stops_at_esc() {
let mut iter = EscapeSequenceOffsetsIterator::new("text\x1B[ming");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::Text { start: 0, end: 4 })
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_osc_with_bel() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]abc\x07");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::OSC {
start_sequence: 0,
start_command: 2,
start_terminator: 5,
end: 6,
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_osc_with_st() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]abc\x1B\\");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::OSC {
start_sequence: 0,
start_command: 2,
start_terminator: 5,
end: 7,
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_osc_thats_broken() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]ab");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::OSC {
start_sequence: 0,
start_command: 2,
start_terminator: 4,
end: 4,
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_csi() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[m");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::CSI {
start_sequence: 0,
start_parameters: 2,
start_intermediates: 2,
start_final_byte: 2,
end: 3
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_csi_with_parameters() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1;34m");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::CSI {
start_sequence: 0,
start_parameters: 2,
start_intermediates: 6,
start_final_byte: 6,
end: 7
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_csi_with_intermediates() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[$m");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::CSI {
start_sequence: 0,
start_parameters: 2,
start_intermediates: 2,
start_final_byte: 3,
end: 4
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_csi_with_parameters_and_intermediates() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1$m");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::CSI {
start_sequence: 0,
start_parameters: 2,
start_intermediates: 3,
start_final_byte: 4,
end: 5
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_csi_thats_broken() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::CSI {
start_sequence: 0,
start_parameters: 2,
start_intermediates: 2,
start_final_byte: 2,
end: 2
})
);
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::CSI {
start_sequence: 0,
start_parameters: 2,
start_intermediates: 3,
start_final_byte: 3,
end: 3
})
);
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1$");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::CSI {
start_sequence: 0,
start_parameters: 2,
start_intermediates: 3,
start_final_byte: 4,
end: 4
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_nf() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B($0");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::NF {
start_sequence: 0,
start: 1,
end: 4
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_parses_nf_thats_broken() {
let mut iter = EscapeSequenceOffsetsIterator::new("\x1B(");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::NF {
start_sequence: 0,
start: 1,
end: 1
})
);
}
#[test]
fn test_escape_sequence_offsets_iterator_iterates() {
let mut iter = EscapeSequenceOffsetsIterator::new("text\x1B[33m\x1B]OSC\x07\x1B(0");
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::Text { start: 0, end: 4 })
);
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::CSI {
start_sequence: 4,
start_parameters: 6,
start_intermediates: 8,
start_final_byte: 8,
end: 9
})
);
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::OSC {
start_sequence: 9,
start_command: 11,
start_terminator: 14,
end: 15
})
);
assert_eq!(
iter.next(),
Some(EscapeSequenceOffsets::NF {
start_sequence: 15,
start: 16,
end: 18
})
);
assert_eq!(iter.next(), None);
}
#[test]
fn test_escape_sequence_iterator_iterates() {
let mut iter = EscapeSequenceIterator::new("text\x1B[33m\x1B]OSC\x07\x1B]OSC\x1B\\\x1B(0");
assert_eq!(iter.next(), Some(EscapeSequence::Text("text")));
assert_eq!(
iter.next(),
Some(EscapeSequence::CSI {
raw_sequence: "\x1B[33m",
parameters: "33",
intermediates: "",
final_byte: "m",
})
);
assert_eq!(
iter.next(),
Some(EscapeSequence::OSC {
raw_sequence: "\x1B]OSC\x07",
command: "OSC",
terminator: "\x07",
})
);
assert_eq!(
iter.next(),
Some(EscapeSequence::OSC {
raw_sequence: "\x1B]OSC\x1B\\",
command: "OSC",
terminator: "\x1B\\",
})
);
assert_eq!(
iter.next(),
Some(EscapeSequence::NF {
raw_sequence: "\x1B(0",
nf_sequence: "(0",
})
);
assert_eq!(iter.next(), None);
}
}

View file

@ -9,6 +9,13 @@ if ! command -v hyperfine > /dev/null 2>&1; then
exit 1
fi
# Check that jq is installed.
if ! command -v jq > /dev/null 2>&1; then
echo "'jq' does not seem to be installed."
echo "You can get it here: https://jqlang.github.io/jq/download/"
exit 1
fi
# Check that python3 is installed.
if ! command -v python3 > /dev/null 2>&1; then
echo "'python3' does not seem to be installed."
@ -95,10 +102,20 @@ hyperfine \
cat "$RESULT_DIR/startup-time.md" >> "$REPORT"
heading "Startup time without syntax highlighting"
hyperfine \
"$(printf "%q" "$BAT") --no-config startup-time-src/small-CpuInfo-file.cpuinfo" \
--command-name "bat … small-CpuInfo-file.cpuinfo" \
--warmup "$WARMUP_COUNT" \
--runs "$RUN_COUNT" \
--export-markdown "$RESULT_DIR/startup-time-without-syntax-highlighting.md" \
--export-json "$RESULT_DIR/startup-time-without-syntax-highlighting.json"
cat "$RESULT_DIR/startup-time-without-syntax-highlighting.md" >> "$REPORT"
heading "Startup time with syntax highlighting"
hyperfine \
"$(printf "%q" "$BAT") --no-config --color=always startup-time-src/small-CpuInfo-file.cpuinfo" \
--command-name "bat … small-CpuInfo-file.cpuinfo" \
--command-name "bat … --color=always small-CpuInfo-file.cpuinfo" \
--warmup "$WARMUP_COUNT" \
--runs "$RUN_COUNT" \
--export-markdown "$RESULT_DIR/startup-time-with-syntax-highlighting.md" \
@ -117,6 +134,40 @@ hyperfine \
cat "$RESULT_DIR/startup-time-with-syntax-with-dependencies.md" >> "$REPORT"
heading "Startup time with indeterminant syntax"
hyperfine \
"$(printf "%q" "$BAT") --no-config --color=always startup-time-src/mystery-file" \
--shell none \
--command-name 'bat … mystery-file' \
--warmup "$WARMUP_COUNT" \
--runs "$RUN_COUNT" \
--export-markdown "$RESULT_DIR/startup-time-with-indeterminant-syntax.md" \
--export-json "$RESULT_DIR/startup-time-with-indeterminant-syntax.json"
cat "$RESULT_DIR/startup-time-with-indeterminant-syntax.md" >> "$REPORT"
heading "Startup time with manually set syntax"
hyperfine \
"$(printf "%q" "$BAT") --no-config --color=always --language=Dockerfile startup-time-src/mystery-file" \
--shell none \
--command-name 'bat … --language=Dockerfile mystery-file' \
--warmup "$WARMUP_COUNT" \
--runs "$RUN_COUNT" \
--export-markdown "$RESULT_DIR/startup-time-with-manually-set-syntax.md" \
--export-json "$RESULT_DIR/startup-time-with-manually-set-syntax.json"
cat "$RESULT_DIR/startup-time-with-manually-set-syntax.md" >> "$REPORT"
heading "Startup time with mapped syntax"
hyperfine \
"$(printf "%q" "$BAT") --no-config --color=always startup-time-src/Containerfile" \
--shell none \
--command-name 'bat … Containerfile' \
--warmup "$WARMUP_COUNT" \
--runs "$RUN_COUNT" \
--export-markdown "$RESULT_DIR/startup-time-with-mapped-syntax.md" \
--export-json "$RESULT_DIR/startup-time-with-mapped-syntax.json"
cat "$RESULT_DIR/startup-time-with-mapped-syntax.md" >> "$REPORT"
heading "Plain-text speed"
hyperfine \
"$(printf "%q" "$BAT") --no-config --language=txt --style=plain highlighting-speed-src/numpy_test_multiarray.py" \

View file

@ -0,0 +1,3 @@
FROM docker.io/alpine:latest
COPY foo /root/bar
RUN sleep 60

View file

@ -0,0 +1,3 @@
FROM docker.io/alpine:latest
COPY foo /root/bar
RUN sleep 60

View file

@ -0,0 +1 @@
]8;;http://example.com\This is a link]8;;\n

View file

@ -0,0 +1 @@
The header is not broken

View file

@ -1175,6 +1175,20 @@ fn bom_stripped_when_no_color_and_not_loop_through() {
);
}
// Regression test for https://github.com/sharkdp/bat/issues/2541
#[test]
fn no_broken_osc_emit_with_line_wrapping() {
bat()
.arg("--color=always")
.arg("--decorations=never")
.arg("--wrap=character")
.arg("--terminal-width=40")
.arg("regression_tests/issue_2541.txt")
.assert()
.success()
.stdout(predicate::function(|s: &str| s.lines().count() == 1));
}
#[test]
fn can_print_file_named_cache() {
bat_with_config()
@ -1393,6 +1407,61 @@ fn header_full_binary() {
.stderr("");
}
#[test]
#[cfg(not(feature = "git"))]
fn header_narrow_terminal() {
bat()
.arg("--terminal-width=30")
.arg("--decorations=always")
.arg("this-file-path-is-really-long-and-would-have-broken-the-layout-of-the-header.txt")
.assert()
.success()
.stdout(
"\
File: this-file-path-is
-really-long-and-would-
have-broken-the-layout-
of-the-header.txt
1 The header is not broke
n
",
)
.stderr("");
}
#[test]
fn header_very_narrow_terminal() {
bat()
.arg("--terminal-width=10")
.arg("--decorations=always")
.arg("this-file-path-is-really-long-and-would-have-broken-the-layout-of-the-header.txt")
.assert()
.success()
.stdout(
"\
File: this
-file-path
-is-really
-long-and-
would-have
-broken-th
e-layout-o
f-the-head
er.txt
The header
is not br
oken
",
)
.stderr("");
}
#[test]
#[cfg(feature = "git")] // Expected output assumes git is enabled
fn header_default() {
@ -1876,6 +1945,62 @@ fn ansi_passthrough_emit() {
}
}
// Ensure that a simple ANSI sequence passthrough is emitted properly on wrapped lines.
// This also helps ensure that escape sequences are counted as part of the visible characters when wrapping.
#[test]
fn ansi_sgr_emitted_when_wrapped() {
bat()
.arg("--paging=never")
.arg("--color=never")
.arg("--terminal-width=20")
.arg("--wrap=character")
.arg("--decorations=always")
.arg("--style=plain")
.write_stdin("\x1B[33mColor...............Also color.\n")
.assert()
.success()
.stdout("\x1B[33m\x1B[33mColor...............\n\x1B[33mAlso color.\n")
// FIXME: ~~~~~~~~ should not be emitted twice.
.stderr("");
}
// Ensure that a simple ANSI sequence passthrough is emitted properly on wrapped lines.
// This also helps ensure that escape sequences are counted as part of the visible characters when wrapping.
#[test]
fn ansi_hyperlink_emitted_when_wrapped() {
bat()
.arg("--paging=never")
.arg("--color=never")
.arg("--terminal-width=20")
.arg("--wrap=character")
.arg("--decorations=always")
.arg("--style=plain")
.write_stdin("\x1B]8;;http://example.com/\x1B\\Hyperlinks..........Wrap across lines.\n")
.assert()
.success()
.stdout("\x1B]8;;http://example.com/\x1B\\\x1B]8;;http://example.com/\x1B\\Hyperlinks..........\x1B]8;;\x1B\\\n\x1B]8;;http://example.com/\x1B\\Wrap across lines.\n")
// FIXME: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should not be emitted twice.
.stderr("");
}
// Ensure that multiple ANSI sequence SGR attributes are combined when emitted on wrapped lines.
#[test]
fn ansi_sgr_joins_attributes_when_wrapped() {
bat()
.arg("--paging=never")
.arg("--color=never")
.arg("--terminal-width=20")
.arg("--wrap=character")
.arg("--decorations=always")
.arg("--style=plain")
.write_stdin("\x1B[33mColor. \x1B[1mBold.........Also bold and color.\n")
.assert()
.success()
.stdout("\x1B[33m\x1B[33mColor. \x1B[1m\x1B[33m\x1B[1mBold.........\n\x1B[33m\x1B[1mAlso bold and color.\n")
// FIXME: ~~~~~~~~ ~~~~~~~~~~~~~~~ should not be emitted twice.
.stderr("");
}
#[test]
fn ignored_suffix_arg() {
bat()

View file

@ -1,31 +1,31 @@
import "../imported-file" ;
# With Comments !
def weird($a; $b; $c):
 [ $a, $b, $c ] | transpose | reduce .[][] as $item (
 [];
 . + $item.property
 )
def weird($a; $b; $c):
 [ $a, $b, $c ] | transpose | reduce .[][] as $item (
 [];
 . + $item.property
 )
;
. | weird (.a; .b; .c) |
. | weird (.a; .b; .c) |
(
if (. | contains("never") ) then
if (. | contains("never") ) then
 "Why yes"
else
 12.23
end
) as $never |
) as $never |
{
 hello,
 why: "because",
 hello: ( weird | ascii_upcase ),
 format_eg: ( . | @json "My json string \( . | this | part | just | white | ascii_upcase | transpose)" ),
 never: $never,
 hello: ( weird | ascii_upcase ),
 format_eg: ( . | @json "My json string \( . | this | part | just | white | ascii_upcase | transpose)" ),
 never: $never,
 "literal_key": literal_value,
 "this": 12.1e12,
 "part": "almost"
@ -38,8 +38,8 @@
 similar: "but not quite"
 }
 }
 ],
} | (
 ],
} | (
 
 # And with very basic brace matching
 
@ -47,13 +47,13 @@
 ] 
 
 # Other invalid ends
 ( [ } ] )
 ( [ } ] )
 # A "valid" sequence
 ( [ { key: () , other_key:( [ [] [[]] ] ), gaga } ] )
 ( [ { key: () , other_key:( [ [] [[]] ] ), gaga } ] )
 # A "invalid" sequence
 ( [ { key: () , other_key:( [ [] [[] ] ), gaga } ] )
 ( [ { key: () , other_key:( [ [] [[] ] ), gaga } ] )
 "A string\n whith escaped characters \" because we can"
)