mirror of
https://github.com/sharkdp/bat
synced 2024-11-24 04:43:07 +00:00
Extract some private submodules from 'bat::assets' (#1850)
This commit is contained in:
parent
6226eba52a
commit
e84b702309
7 changed files with 166 additions and 126 deletions
142
src/assets.rs
142
src/assets.rs
|
@ -1,7 +1,6 @@
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::Path;
|
||||||
|
|
||||||
use lazycell::LazyCell;
|
use lazycell::LazyCell;
|
||||||
|
|
||||||
|
@ -15,17 +14,26 @@ use crate::error::*;
|
||||||
use crate::input::{InputReader, OpenedInput, OpenedInputKind};
|
use crate::input::{InputReader, OpenedInput, OpenedInputKind};
|
||||||
use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
|
use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
|
||||||
|
|
||||||
|
use ignored_suffixes::*;
|
||||||
|
use minimal_assets::*;
|
||||||
|
use serialized_syntax_set::*;
|
||||||
|
|
||||||
|
#[cfg(feature = "build-assets")]
|
||||||
|
pub use crate::assets::build_assets::*;
|
||||||
|
|
||||||
|
pub(crate) mod assets_metadata;
|
||||||
|
#[cfg(feature = "build-assets")]
|
||||||
|
mod build_assets;
|
||||||
|
mod ignored_suffixes;
|
||||||
|
mod minimal_assets;
|
||||||
|
mod serialized_syntax_set;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct HighlightingAssets {
|
pub struct HighlightingAssets {
|
||||||
syntax_set_cell: LazyCell<SyntaxSet>,
|
syntax_set_cell: LazyCell<SyntaxSet>,
|
||||||
serialized_syntax_set: SerializedSyntaxSet,
|
serialized_syntax_set: SerializedSyntaxSet,
|
||||||
|
|
||||||
minimal_syntaxes: MinimalSyntaxes,
|
minimal_assets: MinimalAssets,
|
||||||
|
|
||||||
/// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
|
|
||||||
/// index in this vec matches the index in
|
|
||||||
/// [Self.minimal_syntaxes.serialized_syntax_sets]
|
|
||||||
deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
|
|
||||||
|
|
||||||
theme_set: ThemeSet,
|
theme_set: ThemeSet,
|
||||||
fallback_theme: Option<&'static str>,
|
fallback_theme: Option<&'static str>,
|
||||||
|
@ -37,22 +45,6 @@ pub struct SyntaxReferenceInSet<'a> {
|
||||||
pub syntax_set: &'a SyntaxSet,
|
pub syntax_set: &'a SyntaxSet,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
|
|
||||||
/// stored in serialized form, and are deserialized on-demand. This gives good
|
|
||||||
/// startup performance since only the necessary [SyntaxReference]s needs to be
|
|
||||||
/// deserialized.
|
|
||||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
|
||||||
pub(crate) struct MinimalSyntaxes {
|
|
||||||
/// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
|
|
||||||
/// name of any [SyntaxReference] inside the [SyntaxSet]
|
|
||||||
/// (We will later add `by_extension`, `by_first_line`, etc.)
|
|
||||||
pub(crate) by_name: HashMap<String, usize>,
|
|
||||||
|
|
||||||
/// Serialized [SyntaxSet]s. Whether or not this data is compressed is
|
|
||||||
/// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
|
|
||||||
pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
|
// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
|
||||||
pub(crate) const COMPRESS_SYNTAXES: bool = true;
|
pub(crate) const COMPRESS_SYNTAXES: bool = true;
|
||||||
|
|
||||||
|
@ -70,41 +62,16 @@ pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true;
|
||||||
// efficient byte-by-byte copy of `serialized_syntax_sets`.
|
// efficient byte-by-byte copy of `serialized_syntax_sets`.
|
||||||
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;
|
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;
|
||||||
|
|
||||||
const IGNORED_SUFFIXES: [&str; 13] = [
|
|
||||||
// Editor etc backups
|
|
||||||
"~",
|
|
||||||
".bak",
|
|
||||||
".old",
|
|
||||||
".orig",
|
|
||||||
// Debian and derivatives apt/dpkg/ucf backups
|
|
||||||
".dpkg-dist",
|
|
||||||
".dpkg-old",
|
|
||||||
".ucf-dist",
|
|
||||||
".ucf-new",
|
|
||||||
".ucf-old",
|
|
||||||
// Red Hat and derivatives rpm backups
|
|
||||||
".rpmnew",
|
|
||||||
".rpmorig",
|
|
||||||
".rpmsave",
|
|
||||||
// Build system input/template files
|
|
||||||
".in",
|
|
||||||
];
|
|
||||||
|
|
||||||
impl HighlightingAssets {
|
impl HighlightingAssets {
|
||||||
fn new(
|
fn new(
|
||||||
serialized_syntax_set: SerializedSyntaxSet,
|
serialized_syntax_set: SerializedSyntaxSet,
|
||||||
minimal_syntaxes: MinimalSyntaxes,
|
minimal_syntaxes: MinimalSyntaxes,
|
||||||
theme_set: ThemeSet,
|
theme_set: ThemeSet,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
// Prepare so we can lazily load minimal syntaxes without a mut reference
|
|
||||||
let deserialized_minimal_syntaxes =
|
|
||||||
vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
|
|
||||||
|
|
||||||
HighlightingAssets {
|
HighlightingAssets {
|
||||||
syntax_set_cell: LazyCell::new(),
|
syntax_set_cell: LazyCell::new(),
|
||||||
serialized_syntax_set,
|
serialized_syntax_set,
|
||||||
deserialized_minimal_syntaxes,
|
minimal_assets: MinimalAssets::new(minimal_syntaxes),
|
||||||
minimal_syntaxes,
|
|
||||||
theme_set,
|
theme_set,
|
||||||
fallback_theme: None,
|
fallback_theme: None,
|
||||||
}
|
}
|
||||||
|
@ -167,37 +134,12 @@ impl HighlightingAssets {
|
||||||
/// tries to find a minimal [SyntaxSet]. If none is found, returns the
|
/// tries to find a minimal [SyntaxSet]. If none is found, returns the
|
||||||
/// [SyntaxSet] that contains all syntaxes.
|
/// [SyntaxSet] that contains all syntaxes.
|
||||||
fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
|
fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
|
||||||
let minimal_syntax_set = self
|
match self.minimal_assets.get_syntax_set_by_name(name) {
|
||||||
.minimal_syntaxes
|
|
||||||
.by_name
|
|
||||||
.get(&name.to_ascii_lowercase())
|
|
||||||
.and_then(|index| self.get_minimal_syntax_set_with_index(*index));
|
|
||||||
|
|
||||||
match minimal_syntax_set {
|
|
||||||
Some(syntax_set) => Ok(syntax_set),
|
Some(syntax_set) => Ok(syntax_set),
|
||||||
None => self.get_syntax_set(),
|
None => self.get_syntax_set(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
|
|
||||||
let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
|
|
||||||
asset_from_contents(
|
|
||||||
&serialized_syntax_set[..],
|
|
||||||
&format!("minimal syntax set {}", index),
|
|
||||||
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
|
|
||||||
)
|
|
||||||
.map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
|
|
||||||
self.deserialized_minimal_syntaxes
|
|
||||||
.get(index)
|
|
||||||
.and_then(|cell| {
|
|
||||||
cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
|
|
||||||
.ok()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Use [Self::get_syntax_for_file_name] instead
|
/// Use [Self::get_syntax_for_file_name] instead
|
||||||
#[deprecated]
|
#[deprecated]
|
||||||
pub fn syntax_for_file_name(
|
pub fn syntax_for_file_name(
|
||||||
|
@ -319,7 +261,9 @@ impl HighlightingAssets {
|
||||||
syntax = self.find_syntax_by_file_name_extension(file_name)?;
|
syntax = self.find_syntax_by_file_name_extension(file_name)?;
|
||||||
}
|
}
|
||||||
if syntax.is_none() {
|
if syntax.is_none() {
|
||||||
syntax = self.get_extension_syntax_with_stripped_suffix(file_name)?;
|
syntax = try_with_stripped_suffix(file_name, |stripped_file_name| {
|
||||||
|
self.get_extension_syntax(stripped_file_name) // Note: recursion
|
||||||
|
})?;
|
||||||
}
|
}
|
||||||
Ok(syntax)
|
Ok(syntax)
|
||||||
}
|
}
|
||||||
|
@ -340,25 +284,6 @@ impl HighlightingAssets {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
|
|
||||||
/// then try again to find a syntax without it. Note that we do this recursively.
|
|
||||||
fn get_extension_syntax_with_stripped_suffix(
|
|
||||||
&self,
|
|
||||||
file_name: &OsStr,
|
|
||||||
) -> Result<Option<SyntaxReferenceInSet>> {
|
|
||||||
let file_path = Path::new(file_name);
|
|
||||||
let mut syntax = None;
|
|
||||||
if let Some(file_str) = file_path.to_str() {
|
|
||||||
for suffix in &IGNORED_SUFFIXES {
|
|
||||||
if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
|
|
||||||
syntax = self.get_extension_syntax(OsStr::new(stripped_filename))?;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(syntax)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_first_line_syntax(
|
fn get_first_line_syntax(
|
||||||
&self,
|
&self,
|
||||||
reader: &mut InputReader,
|
reader: &mut InputReader,
|
||||||
|
@ -371,31 +296,6 @@ impl HighlightingAssets {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "build-assets")]
|
|
||||||
pub use crate::build_assets::build_assets as build;
|
|
||||||
|
|
||||||
/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
|
|
||||||
/// We keep it in this format since we want to load it lazily.
|
|
||||||
#[derive(Debug)]
|
|
||||||
enum SerializedSyntaxSet {
|
|
||||||
/// The data comes from a user-generated cache file.
|
|
||||||
FromFile(PathBuf),
|
|
||||||
|
|
||||||
/// The data to use is embedded into the bat binary.
|
|
||||||
FromBinary(&'static [u8]),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SerializedSyntaxSet {
|
|
||||||
fn deserialize(&self) -> Result<SyntaxSet> {
|
|
||||||
match self {
|
|
||||||
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
|
|
||||||
SerializedSyntaxSet::FromFile(ref path) => {
|
|
||||||
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
|
pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
|
||||||
include_bytes!("../assets/syntaxes.bin")
|
include_bytes!("../assets/syntaxes.bin")
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,6 @@ use syntect::parsing::syntax_definition::{
|
||||||
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};
|
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};
|
||||||
|
|
||||||
use crate::assets::*;
|
use crate::assets::*;
|
||||||
use crate::error::*;
|
|
||||||
|
|
||||||
type SyntaxName = String;
|
type SyntaxName = String;
|
||||||
|
|
||||||
|
@ -27,7 +26,7 @@ enum Dependency {
|
||||||
ByScope(Scope),
|
ByScope(Scope),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_assets(
|
pub fn build(
|
||||||
source_dir: &Path,
|
source_dir: &Path,
|
||||||
include_integrated_assets: bool,
|
include_integrated_assets: bool,
|
||||||
target_dir: &Path,
|
target_dir: &Path,
|
42
src/assets/ignored_suffixes.rs
Normal file
42
src/assets/ignored_suffixes.rs
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
use std::ffi::OsStr;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use crate::error::*;
|
||||||
|
|
||||||
|
const IGNORED_SUFFIXES: [&str; 13] = [
|
||||||
|
// Editor etc backups
|
||||||
|
"~",
|
||||||
|
".bak",
|
||||||
|
".old",
|
||||||
|
".orig",
|
||||||
|
// Debian and derivatives apt/dpkg/ucf backups
|
||||||
|
".dpkg-dist",
|
||||||
|
".dpkg-old",
|
||||||
|
".ucf-dist",
|
||||||
|
".ucf-new",
|
||||||
|
".ucf-old",
|
||||||
|
// Red Hat and derivatives rpm backups
|
||||||
|
".rpmnew",
|
||||||
|
".rpmorig",
|
||||||
|
".rpmsave",
|
||||||
|
// Build system input/template files
|
||||||
|
".in",
|
||||||
|
];
|
||||||
|
|
||||||
|
/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
|
||||||
|
/// then try again without it.
|
||||||
|
pub fn try_with_stripped_suffix<T, F>(file_name: &OsStr, func: F) -> Result<Option<T>>
|
||||||
|
where
|
||||||
|
F: Fn(&OsStr) -> Result<Option<T>>,
|
||||||
|
{
|
||||||
|
let mut from_stripped = None;
|
||||||
|
if let Some(file_str) = Path::new(file_name).to_str() {
|
||||||
|
for suffix in &IGNORED_SUFFIXES {
|
||||||
|
if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
|
||||||
|
from_stripped = func(OsStr::new(stripped_filename))?;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(from_stripped)
|
||||||
|
}
|
72
src/assets/minimal_assets.rs
Normal file
72
src/assets/minimal_assets.rs
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use lazycell::LazyCell;
|
||||||
|
|
||||||
|
use syntect::parsing::SyntaxSet;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct MinimalAssets {
|
||||||
|
minimal_syntaxes: MinimalSyntaxes,
|
||||||
|
|
||||||
|
/// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
|
||||||
|
/// index in this vec matches the index in
|
||||||
|
/// [Self.minimal_syntaxes.serialized_syntax_sets]
|
||||||
|
deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
|
||||||
|
/// stored in serialized form, and are deserialized on-demand. This gives good
|
||||||
|
/// startup performance since only the necessary [SyntaxReference]s needs to be
|
||||||
|
/// deserialized.
|
||||||
|
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||||
|
pub(crate) struct MinimalSyntaxes {
|
||||||
|
/// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
|
||||||
|
/// name of any [SyntaxReference] inside the [SyntaxSet]
|
||||||
|
/// (We will later add `by_extension`, `by_first_line`, etc.)
|
||||||
|
pub(crate) by_name: HashMap<String, usize>,
|
||||||
|
|
||||||
|
/// Serialized [SyntaxSet]s. Whether or not this data is compressed is
|
||||||
|
/// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
|
||||||
|
pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MinimalAssets {
|
||||||
|
pub(crate) fn new(minimal_syntaxes: MinimalSyntaxes) -> Self {
|
||||||
|
// Prepare so we can lazily load minimal syntaxes without a mut reference
|
||||||
|
let deserialized_minimal_syntaxes =
|
||||||
|
vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
|
||||||
|
|
||||||
|
Self {
|
||||||
|
minimal_syntaxes,
|
||||||
|
deserialized_minimal_syntaxes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_syntax_set_by_name(&self, name: &str) -> Option<&SyntaxSet> {
|
||||||
|
self.minimal_syntaxes
|
||||||
|
.by_name
|
||||||
|
.get(&name.to_ascii_lowercase())
|
||||||
|
.and_then(|index| self.get_minimal_syntax_set_with_index(*index))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
|
||||||
|
let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
|
||||||
|
asset_from_contents(
|
||||||
|
&serialized_syntax_set[..],
|
||||||
|
&format!("minimal syntax set {}", index),
|
||||||
|
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
|
||||||
|
)
|
||||||
|
.map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
|
||||||
|
self.deserialized_minimal_syntaxes
|
||||||
|
.get(index)
|
||||||
|
.and_then(|cell| {
|
||||||
|
cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
|
||||||
|
.ok()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
27
src/assets/serialized_syntax_set.rs
Normal file
27
src/assets/serialized_syntax_set.rs
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use syntect::parsing::SyntaxSet;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
|
||||||
|
/// We keep it in this format since we want to load it lazily.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum SerializedSyntaxSet {
|
||||||
|
/// The data comes from a user-generated cache file.
|
||||||
|
FromFile(PathBuf),
|
||||||
|
|
||||||
|
/// The data to use is embedded into the bat binary.
|
||||||
|
FromBinary(&'static [u8]),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SerializedSyntaxSet {
|
||||||
|
pub fn deserialize(&self) -> Result<SyntaxSet> {
|
||||||
|
match self {
|
||||||
|
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
|
||||||
|
SerializedSyntaxSet::FromFile(ref path) => {
|
||||||
|
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,9 +22,9 @@
|
||||||
mod macros;
|
mod macros;
|
||||||
|
|
||||||
pub mod assets;
|
pub mod assets;
|
||||||
pub mod assets_metadata;
|
pub mod assets_metadata {
|
||||||
#[cfg(feature = "build-assets")]
|
pub use super::assets::assets_metadata::*;
|
||||||
mod build_assets;
|
}
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod controller;
|
pub mod controller;
|
||||||
mod decorations;
|
mod decorations;
|
||||||
|
|
Loading…
Reference in a new issue