Extract some private submodules from 'bat::assets' (#1850)

This commit is contained in:
Martin Nordholts 2021-09-15 07:59:33 +02:00 committed by GitHub
parent 6226eba52a
commit e84b702309
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 166 additions and 126 deletions

View file

@ -1,7 +1,6 @@
use std::collections::HashMap;
use std::ffi::OsStr; use std::ffi::OsStr;
use std::fs; use std::fs;
use std::path::{Path, PathBuf}; use std::path::Path;
use lazycell::LazyCell; use lazycell::LazyCell;
@ -15,17 +14,26 @@ use crate::error::*;
use crate::input::{InputReader, OpenedInput, OpenedInputKind}; use crate::input::{InputReader, OpenedInput, OpenedInputKind};
use crate::syntax_mapping::{MappingTarget, SyntaxMapping}; use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
use ignored_suffixes::*;
use minimal_assets::*;
use serialized_syntax_set::*;
#[cfg(feature = "build-assets")]
pub use crate::assets::build_assets::*;
pub(crate) mod assets_metadata;
#[cfg(feature = "build-assets")]
mod build_assets;
mod ignored_suffixes;
mod minimal_assets;
mod serialized_syntax_set;
#[derive(Debug)] #[derive(Debug)]
pub struct HighlightingAssets { pub struct HighlightingAssets {
syntax_set_cell: LazyCell<SyntaxSet>, syntax_set_cell: LazyCell<SyntaxSet>,
serialized_syntax_set: SerializedSyntaxSet, serialized_syntax_set: SerializedSyntaxSet,
minimal_syntaxes: MinimalSyntaxes, minimal_assets: MinimalAssets,
/// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
/// index in this vec matches the index in
/// [Self.minimal_syntaxes.serialized_syntax_sets]
deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
theme_set: ThemeSet, theme_set: ThemeSet,
fallback_theme: Option<&'static str>, fallback_theme: Option<&'static str>,
@ -37,22 +45,6 @@ pub struct SyntaxReferenceInSet<'a> {
pub syntax_set: &'a SyntaxSet, pub syntax_set: &'a SyntaxSet,
} }
/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
/// stored in serialized form, and are deserialized on-demand. This gives good
/// startup performance since only the necessary [SyntaxReference]s needs to be
/// deserialized.
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub(crate) struct MinimalSyntaxes {
/// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
/// name of any [SyntaxReference] inside the [SyntaxSet]
/// (We will later add `by_extension`, `by_first_line`, etc.)
pub(crate) by_name: HashMap<String, usize>,
/// Serialized [SyntaxSet]s. Whether or not this data is compressed is
/// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
}
// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time // Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
pub(crate) const COMPRESS_SYNTAXES: bool = true; pub(crate) const COMPRESS_SYNTAXES: bool = true;
@ -70,41 +62,16 @@ pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true;
// efficient byte-by-byte copy of `serialized_syntax_sets`. // efficient byte-by-byte copy of `serialized_syntax_sets`.
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false; pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;
const IGNORED_SUFFIXES: [&str; 13] = [
// Editor etc backups
"~",
".bak",
".old",
".orig",
// Debian and derivatives apt/dpkg/ucf backups
".dpkg-dist",
".dpkg-old",
".ucf-dist",
".ucf-new",
".ucf-old",
// Red Hat and derivatives rpm backups
".rpmnew",
".rpmorig",
".rpmsave",
// Build system input/template files
".in",
];
impl HighlightingAssets { impl HighlightingAssets {
fn new( fn new(
serialized_syntax_set: SerializedSyntaxSet, serialized_syntax_set: SerializedSyntaxSet,
minimal_syntaxes: MinimalSyntaxes, minimal_syntaxes: MinimalSyntaxes,
theme_set: ThemeSet, theme_set: ThemeSet,
) -> Self { ) -> Self {
// Prepare so we can lazily load minimal syntaxes without a mut reference
let deserialized_minimal_syntaxes =
vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
HighlightingAssets { HighlightingAssets {
syntax_set_cell: LazyCell::new(), syntax_set_cell: LazyCell::new(),
serialized_syntax_set, serialized_syntax_set,
deserialized_minimal_syntaxes, minimal_assets: MinimalAssets::new(minimal_syntaxes),
minimal_syntaxes,
theme_set, theme_set,
fallback_theme: None, fallback_theme: None,
} }
@ -167,37 +134,12 @@ impl HighlightingAssets {
/// tries to find a minimal [SyntaxSet]. If none is found, returns the /// tries to find a minimal [SyntaxSet]. If none is found, returns the
/// [SyntaxSet] that contains all syntaxes. /// [SyntaxSet] that contains all syntaxes.
fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> { fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
let minimal_syntax_set = self match self.minimal_assets.get_syntax_set_by_name(name) {
.minimal_syntaxes
.by_name
.get(&name.to_ascii_lowercase())
.and_then(|index| self.get_minimal_syntax_set_with_index(*index));
match minimal_syntax_set {
Some(syntax_set) => Ok(syntax_set), Some(syntax_set) => Ok(syntax_set),
None => self.get_syntax_set(), None => self.get_syntax_set(),
} }
} }
fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
asset_from_contents(
&serialized_syntax_set[..],
&format!("minimal syntax set {}", index),
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
)
.map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
}
fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
self.deserialized_minimal_syntaxes
.get(index)
.and_then(|cell| {
cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
.ok()
})
}
/// Use [Self::get_syntax_for_file_name] instead /// Use [Self::get_syntax_for_file_name] instead
#[deprecated] #[deprecated]
pub fn syntax_for_file_name( pub fn syntax_for_file_name(
@ -319,7 +261,9 @@ impl HighlightingAssets {
syntax = self.find_syntax_by_file_name_extension(file_name)?; syntax = self.find_syntax_by_file_name_extension(file_name)?;
} }
if syntax.is_none() { if syntax.is_none() {
syntax = self.get_extension_syntax_with_stripped_suffix(file_name)?; syntax = try_with_stripped_suffix(file_name, |stripped_file_name| {
self.get_extension_syntax(stripped_file_name) // Note: recursion
})?;
} }
Ok(syntax) Ok(syntax)
} }
@ -340,25 +284,6 @@ impl HighlightingAssets {
) )
} }
/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
/// then try again to find a syntax without it. Note that we do this recursively.
fn get_extension_syntax_with_stripped_suffix(
&self,
file_name: &OsStr,
) -> Result<Option<SyntaxReferenceInSet>> {
let file_path = Path::new(file_name);
let mut syntax = None;
if let Some(file_str) = file_path.to_str() {
for suffix in &IGNORED_SUFFIXES {
if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
syntax = self.get_extension_syntax(OsStr::new(stripped_filename))?;
break;
}
}
}
Ok(syntax)
}
fn get_first_line_syntax( fn get_first_line_syntax(
&self, &self,
reader: &mut InputReader, reader: &mut InputReader,
@ -371,31 +296,6 @@ impl HighlightingAssets {
} }
} }
#[cfg(feature = "build-assets")]
pub use crate::build_assets::build_assets as build;
/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
/// We keep it in this format since we want to load it lazily.
#[derive(Debug)]
enum SerializedSyntaxSet {
/// The data comes from a user-generated cache file.
FromFile(PathBuf),
/// The data to use is embedded into the bat binary.
FromBinary(&'static [u8]),
}
impl SerializedSyntaxSet {
fn deserialize(&self) -> Result<SyntaxSet> {
match self {
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
SerializedSyntaxSet::FromFile(ref path) => {
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
}
}
}
}
pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] { pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
include_bytes!("../assets/syntaxes.bin") include_bytes!("../assets/syntaxes.bin")
} }

View file

@ -7,7 +7,6 @@ use syntect::parsing::syntax_definition::{
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder}; use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};
use crate::assets::*; use crate::assets::*;
use crate::error::*;
type SyntaxName = String; type SyntaxName = String;
@ -27,7 +26,7 @@ enum Dependency {
ByScope(Scope), ByScope(Scope),
} }
pub fn build_assets( pub fn build(
source_dir: &Path, source_dir: &Path,
include_integrated_assets: bool, include_integrated_assets: bool,
target_dir: &Path, target_dir: &Path,

View file

@ -0,0 +1,42 @@
use std::ffi::OsStr;
use std::path::Path;
use crate::error::*;
const IGNORED_SUFFIXES: [&str; 13] = [
// Editor etc backups
"~",
".bak",
".old",
".orig",
// Debian and derivatives apt/dpkg/ucf backups
".dpkg-dist",
".dpkg-old",
".ucf-dist",
".ucf-new",
".ucf-old",
// Red Hat and derivatives rpm backups
".rpmnew",
".rpmorig",
".rpmsave",
// Build system input/template files
".in",
];
/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
/// then try again without it.
pub fn try_with_stripped_suffix<T, F>(file_name: &OsStr, func: F) -> Result<Option<T>>
where
F: Fn(&OsStr) -> Result<Option<T>>,
{
let mut from_stripped = None;
if let Some(file_str) = Path::new(file_name).to_str() {
for suffix in &IGNORED_SUFFIXES {
if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
from_stripped = func(OsStr::new(stripped_filename))?;
break;
}
}
}
Ok(from_stripped)
}

View file

@ -0,0 +1,72 @@
use std::collections::HashMap;
use lazycell::LazyCell;
use syntect::parsing::SyntaxSet;
use super::*;
#[derive(Debug)]
pub(crate) struct MinimalAssets {
minimal_syntaxes: MinimalSyntaxes,
/// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
/// index in this vec matches the index in
/// [Self.minimal_syntaxes.serialized_syntax_sets]
deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
}
/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
/// stored in serialized form, and are deserialized on-demand. This gives good
/// startup performance since only the necessary [SyntaxReference]s needs to be
/// deserialized.
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub(crate) struct MinimalSyntaxes {
/// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
/// name of any [SyntaxReference] inside the [SyntaxSet]
/// (We will later add `by_extension`, `by_first_line`, etc.)
pub(crate) by_name: HashMap<String, usize>,
/// Serialized [SyntaxSet]s. Whether or not this data is compressed is
/// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
}
impl MinimalAssets {
pub(crate) fn new(minimal_syntaxes: MinimalSyntaxes) -> Self {
// Prepare so we can lazily load minimal syntaxes without a mut reference
let deserialized_minimal_syntaxes =
vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
Self {
minimal_syntaxes,
deserialized_minimal_syntaxes,
}
}
pub fn get_syntax_set_by_name(&self, name: &str) -> Option<&SyntaxSet> {
self.minimal_syntaxes
.by_name
.get(&name.to_ascii_lowercase())
.and_then(|index| self.get_minimal_syntax_set_with_index(*index))
}
fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
asset_from_contents(
&serialized_syntax_set[..],
&format!("minimal syntax set {}", index),
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
)
.map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
}
fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
self.deserialized_minimal_syntaxes
.get(index)
.and_then(|cell| {
cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
.ok()
})
}
}

View file

@ -0,0 +1,27 @@
use std::path::PathBuf;
use syntect::parsing::SyntaxSet;
use super::*;
/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
/// We keep it in this format since we want to load it lazily.
#[derive(Debug)]
pub enum SerializedSyntaxSet {
/// The data comes from a user-generated cache file.
FromFile(PathBuf),
/// The data to use is embedded into the bat binary.
FromBinary(&'static [u8]),
}
impl SerializedSyntaxSet {
pub fn deserialize(&self) -> Result<SyntaxSet> {
match self {
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
SerializedSyntaxSet::FromFile(ref path) => {
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
}
}
}
}

View file

@ -22,9 +22,9 @@
mod macros; mod macros;
pub mod assets; pub mod assets;
pub mod assets_metadata; pub mod assets_metadata {
#[cfg(feature = "build-assets")] pub use super::assets::assets_metadata::*;
mod build_assets; }
pub mod config; pub mod config;
pub mod controller; pub mod controller;
mod decorations; mod decorations;