mirror of
https://github.com/getzola/zola
synced 2024-12-05 01:49:12 +00:00
Allow ignoring files when link checking (#2264)
* Allow ignoring files when link checking * cargo fmt * Fix tests * Remove mystery duplicate function..? * Add in some mysterious missing code..? * Simple tests for link checker file globs in config * cargo fmt * Remove comment * convert expect to error propagation * Address comments * cargo fmt
This commit is contained in:
parent
bdb18657b6
commit
7d18ddfde2
5 changed files with 123 additions and 48 deletions
|
@ -1,5 +1,9 @@
|
|||
use libs::globset::GlobSet;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use errors::Result;
|
||||
use utils::globs::build_ignore_glob_set;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum LinkCheckerLevel {
|
||||
#[serde(rename = "error")]
|
||||
|
@ -14,7 +18,7 @@ impl Default for LinkCheckerLevel {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct LinkChecker {
|
||||
/// Skip link checking for these URL prefixes
|
||||
|
@ -25,4 +29,16 @@ pub struct LinkChecker {
|
|||
pub internal_level: LinkCheckerLevel,
|
||||
/// Emit either "error" or "warn" for broken external links (including anchor links).
|
||||
pub external_level: LinkCheckerLevel,
|
||||
/// A list of file glob patterns to skip link checking on
|
||||
pub ignored_files: Vec<String>,
|
||||
#[serde(skip_serializing, skip_deserializing)] // not a typo, 2 are needed
|
||||
pub ignored_files_globset: Option<GlobSet>,
|
||||
}
|
||||
|
||||
impl LinkChecker {
|
||||
pub fn resolve_globset(&mut self) -> Result<()> {
|
||||
let glob_set = build_ignore_glob_set(&self.ignored_files, "files")?;
|
||||
self.ignored_files_globset = Some(glob_set);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,13 +8,14 @@ pub mod taxonomies;
|
|||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use libs::globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
use libs::globset::GlobSet;
|
||||
use libs::toml::Value as Toml;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::theme::Theme;
|
||||
use errors::{anyhow, bail, Result};
|
||||
use utils::fs::read_file;
|
||||
use utils::globs::build_ignore_glob_set;
|
||||
use utils::slugs::slugify_paths;
|
||||
|
||||
// We want a default base url for tests
|
||||
|
@ -28,18 +29,6 @@ pub enum Mode {
|
|||
Check,
|
||||
}
|
||||
|
||||
fn build_ignore_glob_set(ignore: &Vec<String>, name: &str) -> Result<GlobSet> {
|
||||
let mut glob_set_builder = GlobSetBuilder::new();
|
||||
for pat in ignore {
|
||||
let glob = match Glob::new(pat) {
|
||||
Ok(g) => g,
|
||||
Err(e) => bail!("Invalid ignored_{} glob pattern: {}, error = {}", name, pat, e),
|
||||
};
|
||||
glob_set_builder.add(glob);
|
||||
}
|
||||
Ok(glob_set_builder.build().unwrap_or_else(|_| panic!("Bad ignored_{} in config file.", name)))
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct Config {
|
||||
|
@ -150,21 +139,13 @@ impl Config {
|
|||
|
||||
config.add_default_language()?;
|
||||
config.slugify_taxonomies();
|
||||
config.link_checker.resolve_globset()?;
|
||||
|
||||
if !config.ignored_content.is_empty() {
|
||||
// Convert the file glob strings into a compiled glob set matcher. We want to do this once,
|
||||
// at program initialization, rather than for every page, for example. We arrange for the
|
||||
// globset matcher to always exist (even though it has to be an inside an Option at the
|
||||
// moment because of the TOML serializer); if the glob set is empty the `is_match` function
|
||||
// of the globber always returns false.
|
||||
let glob_set = build_ignore_glob_set(&config.ignored_content, "content")?;
|
||||
config.ignored_content_globset = Some(glob_set);
|
||||
}
|
||||
let content_glob_set = build_ignore_glob_set(&config.ignored_content, "content")?;
|
||||
config.ignored_content_globset = Some(content_glob_set);
|
||||
|
||||
if !config.ignored_static.is_empty() {
|
||||
let glob_set = build_ignore_glob_set(&config.ignored_static, "static")?;
|
||||
config.ignored_static_globset = Some(glob_set);
|
||||
}
|
||||
let static_glob_set = build_ignore_glob_set(&config.ignored_static, "static")?;
|
||||
config.ignored_static_globset = Some(static_glob_set);
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
|
@ -652,32 +633,18 @@ title = "A title"
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn missing_ignored_content_results_in_empty_vector_and_empty_globset() {
|
||||
fn missing_ignored_content_results_in_empty_vector() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
"#;
|
||||
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
let v = config.ignored_content;
|
||||
assert_eq!(v.len(), 0);
|
||||
assert!(config.ignored_content_globset.is_none());
|
||||
assert_eq!(config.ignored_content.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_ignored_static_results_in_empty_vector_and_empty_globset() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
"#;
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
let v = config.ignored_static;
|
||||
assert_eq!(v.len(), 0);
|
||||
assert!(config.ignored_static_globset.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_ignored_content_results_in_empty_vector_and_empty_globset() {
|
||||
fn empty_ignored_content_results_in_empty_vector() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
|
@ -686,11 +653,21 @@ ignored_content = []
|
|||
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
assert_eq!(config.ignored_content.len(), 0);
|
||||
assert!(config.ignored_content_globset.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_ignored_static_results_in_empty_vector_and_empty_globset() {
|
||||
fn missing_ignored_static_results_in_empty_vector() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
"#;
|
||||
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
assert_eq!(config.ignored_static.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_ignored_static_results_in_empty_vector() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
|
@ -699,7 +676,30 @@ ignored_static = []
|
|||
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
assert_eq!(config.ignored_static.len(), 0);
|
||||
assert!(config.ignored_static_globset.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_link_checker_ignored_files_results_in_empty_vector() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
"#;
|
||||
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
assert_eq!(config.link_checker.ignored_files.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_link_checker_ignored_files_results_in_empty_vector() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
[link_checker]
|
||||
ignored_files = []
|
||||
"#;
|
||||
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
assert_eq!(config.link_checker.ignored_files.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -760,6 +760,36 @@ ignored_static = ["*.{graphml,iso}", "*.py?", "**/{target,temp_folder}"]
|
|||
assert!(g.is_match("content/poetry/zen.py2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_empty_link_checker_ignored_pages_results_in_vector_of_patterns_and_configured_globset() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
[link_checker]
|
||||
ignored_files = ["*.{graphml,iso}", "*.py?", "**/{target,temp_folder}"]
|
||||
"#;
|
||||
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
let v = config.link_checker.ignored_files;
|
||||
assert_eq!(v, vec!["*.{graphml,iso}", "*.py?", "**/{target,temp_folder}"]);
|
||||
|
||||
let g = config.link_checker.ignored_files_globset.unwrap();
|
||||
assert_eq!(g.len(), 3);
|
||||
assert!(g.is_match("foo.graphml"));
|
||||
assert!(g.is_match("foo/bar/foo.graphml"));
|
||||
assert!(g.is_match("foo.iso"));
|
||||
assert!(!g.is_match("foo.png"));
|
||||
assert!(g.is_match("foo.py2"));
|
||||
assert!(g.is_match("foo.py3"));
|
||||
assert!(!g.is_match("foo.py"));
|
||||
assert!(g.is_match("foo/bar/target"));
|
||||
assert!(g.is_match("foo/bar/baz/temp_folder"));
|
||||
assert!(g.is_match("foo/bar/baz/temp_folder/target"));
|
||||
assert!(g.is_match("temp_folder"));
|
||||
assert!(g.is_match("my/isos/foo.iso"));
|
||||
assert!(g.is_match("content/poetry/zen.py2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn link_checker_skip_anchor_prefixes() {
|
||||
let config_str = r#"
|
||||
|
|
|
@ -3,6 +3,7 @@ use std::path::{Path, PathBuf};
|
|||
use std::{cmp, collections::HashMap, collections::HashSet, iter::FromIterator, thread};
|
||||
|
||||
use config::LinkCheckerLevel;
|
||||
use libs::globset::GlobSet;
|
||||
use libs::rayon::prelude::*;
|
||||
|
||||
use crate::Site;
|
||||
|
@ -105,6 +106,10 @@ fn should_skip_by_prefix(link: &str, skip_prefixes: &[String]) -> bool {
|
|||
skip_prefixes.iter().any(|prefix| link.starts_with(prefix))
|
||||
}
|
||||
|
||||
fn should_skip_by_file(file_path: &Path, glob_set: &GlobSet) -> bool {
|
||||
glob_set.is_match(file_path)
|
||||
}
|
||||
|
||||
fn get_link_domain(link: &str) -> Result<String> {
|
||||
return match Url::parse(link) {
|
||||
Ok(url) => match url.host_str().map(String::from) {
|
||||
|
@ -150,9 +155,12 @@ pub fn check_external_links(site: &Site) -> Vec<String> {
|
|||
let mut invalid_url_links: u32 = 0;
|
||||
// First we look at all the external links, skip those the user wants to skip and record
|
||||
// the ones that have invalid URLs
|
||||
let ignored_files_globset = site.config.link_checker.ignored_files_globset.as_ref().unwrap();
|
||||
for (file_path, links) in external_links {
|
||||
for link in links {
|
||||
if should_skip_by_prefix(link, &site.config.link_checker.skip_prefixes) {
|
||||
if should_skip_by_prefix(link, &site.config.link_checker.skip_prefixes)
|
||||
|| should_skip_by_file(file_path, ignored_files_globset)
|
||||
{
|
||||
skipped_link_count += 1;
|
||||
} else {
|
||||
match get_link_domain(link) {
|
||||
|
|
20
components/utils/src/globs.rs
Normal file
20
components/utils/src/globs.rs
Normal file
|
@ -0,0 +1,20 @@
|
|||
use libs::globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
|
||||
use errors::{bail, Result};
|
||||
|
||||
pub fn build_ignore_glob_set(ignore: &Vec<String>, name: &str) -> Result<GlobSet> {
|
||||
// Convert the file glob strings into a compiled glob set matcher. We want to do this once,
|
||||
// at program initialization, rather than for every page, for example. We arrange for the
|
||||
// globset matcher to always exist (even though it has to be inside an Option at the
|
||||
// moment because of the TOML serializer); if the glob set is empty the `is_match` function
|
||||
// of the globber always returns false.
|
||||
let mut glob_set_builder = GlobSetBuilder::new();
|
||||
for pat in ignore {
|
||||
let glob = match Glob::new(pat) {
|
||||
Ok(g) => g,
|
||||
Err(e) => bail!("Invalid ignored_{} glob pattern: {}, error = {}", name, pat, e),
|
||||
};
|
||||
glob_set_builder.add(glob);
|
||||
}
|
||||
Ok(glob_set_builder.build()?)
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
pub mod anchors;
|
||||
pub mod de;
|
||||
pub mod fs;
|
||||
pub mod globs;
|
||||
pub mod net;
|
||||
pub mod site;
|
||||
pub mod slugs;
|
||||
|
|
Loading…
Reference in a new issue