Update to elasticlunr-rs 3.0.0 (#1873)

This commit is contained in:
Matt Ickstadt 2022-06-03 16:25:42 -05:00 committed by GitHub
parent ed296d72a1
commit e9e6cadc6c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 187 additions and 185 deletions

285
Cargo.lock generated
View file

@ -47,15 +47,6 @@ dependencies = [
"url",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "any_ascii"
version = "0.1.7"
@ -179,6 +170,12 @@ dependencies = [
"serde",
]
[[package]]
name = "build_const"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7"
[[package]]
name = "bumpalo"
version = "3.9.1"
@ -296,19 +293,10 @@ dependencies = [
]
[[package]]
name = "clap"
version = "2.34.0"
name = "chunked_transfer"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim 0.8.0",
"textwrap 0.11.0",
"unicode-width",
"vec_map",
]
checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e"
[[package]]
name = "clap"
@ -322,9 +310,9 @@ dependencies = [
"clap_lex",
"indexmap",
"lazy_static",
"strsim 0.10.0",
"strsim",
"termcolor",
"textwrap 0.15.0",
"textwrap",
]
[[package]]
@ -333,7 +321,7 @@ version = "3.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da92e6facd8d73c22745a5d3cbb59bdf8e46e3235c923e516527d8e81eec14a4"
dependencies = [
"clap 3.1.17",
"clap",
]
[[package]]
@ -342,7 +330,7 @@ version = "3.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3aab4734e083b809aaf5794e14e756d1c798d2c69c7f7de7a09a2f5214993c1"
dependencies = [
"heck 0.4.0",
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
@ -446,6 +434,15 @@ dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
dependencies = [
"build_const",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
@ -590,12 +587,11 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "elasticlunr-rs"
version = "2.3.14"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60eee99ae400fb1c4521ea3bd678994cb66572754d240449368e8ecd40281569"
checksum = "e6dae5cac90640734ee881bc5f21b6e5123f4e5235e52428db114abffc2391d6"
dependencies = [
"jieba-rs",
"lazy_static",
"lindera",
"lindera-core",
"regex",
@ -603,8 +599,6 @@ dependencies = [
"serde",
"serde_derive",
"serde_json",
"strum",
"strum_macros",
]
[[package]]
@ -686,6 +680,19 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "env_logger"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3"
dependencies = [
"atty",
"humantime",
"log",
"regex",
"termcolor",
]
[[package]]
name = "errors"
version = "0.1.0"
@ -1018,15 +1025,6 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
[[package]]
name = "heck"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "heck"
version = "0.4.0"
@ -1096,6 +1094,12 @@ version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "hyper"
version = "0.14.18"
@ -1450,42 +1454,78 @@ dependencies = [
[[package]]
name = "lindera"
version = "0.8.1"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e067b79992ab4ee575f5113ca7ccc1b011f67378f7627169e9bf95d48a8d481"
checksum = "7d1c5db4b1d12637aa316dc1adb215f78fe79025080af750942516c5ff17d1a0"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"encoding",
"lindera-cc-cedict-builder",
"lindera-core",
"lindera-dictionary",
"lindera-ipadic",
"lindera-ipadic-builder",
"lindera-ko-dic-builder",
"lindera-unidic-builder",
"serde",
"serde_json",
"thiserror",
]
[[package]]
name = "lindera-cc-cedict-builder"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73a3509fb497340571d49feddb57e1db2ce5248c4d449f2548d0ee8cb745eb1e"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"clap",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-core",
"lindera-decompress",
"log",
"yada",
]
[[package]]
name = "lindera-core"
version = "0.8.1"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09d34134111feb8c9424de5743a9ead4c22cb1c5a48cb90322ebbe21a2bc27c1"
checksum = "5d20d1b2c085393aed58625d741beca69410e1143fc35bc67ebc35c9885f9f74"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"encoding",
"log",
"serde",
"thiserror",
"yada",
]
[[package]]
name = "lindera-dictionary"
version = "0.8.1"
name = "lindera-decompress"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68ac4ac60f3ca650e4ab1280a5b6d57f73267902477ab9c9fd3b6609a7fb5888"
checksum = "b96b8050cded13927a99bcb8cbb0987f89fc8f35429fc153b4bc05ddc7a53a44"
dependencies = [
"anyhow",
"lzma-rs",
"serde",
]
[[package]]
name = "lindera-dictionary"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5abe3dddc22303402957edb4472ab0c996e0d93b3b00643de3bee8b28c2f9297"
dependencies = [
"anyhow",
"bincode",
@ -1495,9 +1535,9 @@ dependencies = [
[[package]]
name = "lindera-ipadic"
version = "0.8.1"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "266fda136179e607d6ebcf2ef326fbdb2a133f9bdea9a68e6ac4fa8627e47ced"
checksum = "b8f4c111f6ad9eb9e015d02061af2ed36fc0255f29359294415c7c2f1ea5b5b6"
dependencies = [
"bincode",
"byteorder",
@ -1505,28 +1545,71 @@ dependencies = [
"flate2",
"lindera-core",
"lindera-ipadic-builder",
"reqwest",
"once_cell",
"tar",
"tokio",
"ureq",
]
[[package]]
name = "lindera-ipadic-builder"
version = "0.8.1"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ede56e474b8fda9d4df2b9dc7683018111d3298260e1f594655e34287f26c64"
checksum = "a2b9893f22a4a7511ac70ff7d96cda9b8d7259b7d7121784183c73bc593ce6e7"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"clap 2.34.0",
"clap",
"encoding",
"env_logger",
"glob",
"lindera-core",
"lindera-decompress",
"log",
"serde",
"yada",
]
[[package]]
name = "lindera-ko-dic-builder"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14282600ebfe7ab6fd4f3042143024ff9d74c09d58fd983d0c587839cf940d4a"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"clap",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-core",
"lindera-decompress",
"log",
"yada",
]
[[package]]
name = "lindera-unidic-builder"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b20825d46c95854e47c532c3e548dfec07c8f187c1ed89383cb6c35790338088"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"clap",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-core",
"lindera-decompress",
"log",
"yada",
]
[[package]]
name = "line-wrap"
version = "0.1.1"
@ -1572,6 +1655,16 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "lzma-rs"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1"
dependencies = [
"byteorder",
"crc",
]
[[package]]
name = "mac"
version = "0.1.1"
@ -2796,15 +2889,6 @@ dependencies = [
"digest 0.10.3",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
dependencies = [
"libc",
]
[[package]]
name = "similar"
version = "2.1.0"
@ -2914,36 +2998,12 @@ dependencies = [
"quote",
]
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aaf86bbcfd1fa9670b7a129f64fc0c9fcbbfe4f1bc4210e9e98fe71ffc12cde2"
[[package]]
name = "strum_macros"
version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec"
dependencies = [
"heck 0.3.3",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "svg_metadata"
version = "0.4.2"
@ -3096,15 +3156,6 @@ dependencies = [
"syn",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "textwrap"
version = "0.15.0"
@ -3211,25 +3262,11 @@ dependencies = [
"mio 0.8.2",
"num_cpus",
"once_cell",
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"tokio-macros",
"winapi 0.3.9",
]
[[package]]
name = "tokio-macros"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tokio-native-tls"
version = "0.3.0"
@ -3437,12 +3474,6 @@ version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
[[package]]
name = "unicode-width"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
[[package]]
name = "unicode-xid"
version = "0.2.3"
@ -3455,6 +3486,22 @@ version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
[[package]]
name = "ureq"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9399fa2f927a3d327187cbd201480cee55bee6ac5d3c77dd27f0c6814cff16d5"
dependencies = [
"base64",
"chunked_transfer",
"log",
"once_cell",
"rustls",
"url",
"webpki",
"webpki-roots",
]
[[package]]
name = "url"
version = "2.2.2"
@ -3489,12 +3536,6 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.4"
@ -3823,7 +3864,7 @@ dependencies = [
name = "zola"
version = "0.16.0"
dependencies = [
"clap 3.1.17",
"clap",
"clap_complete",
"console 0.1.0",
"ctrlc",

View file

@ -9,7 +9,7 @@ ammonia = "3"
atty = "0.2.11"
base64 = "0.13"
csv = "1"
elasticlunr-rs = {version = "2", default-features = false, features = ["da", "no", "de", "du", "es", "fi", "fr", "it", "pt", "ro", "ru", "sv", "tr"] }
elasticlunr-rs = { version = "3.0.0", features = ["da", "no", "de", "du", "es", "fi", "fr", "it", "pt", "ro", "ru", "sv", "tr"] }
filetime = "0.2"
gh-emoji = "1"
glob = "0.3"

View file

@ -1,9 +1,7 @@
use std::collections::{HashMap, HashSet};
use libs::ammonia;
use libs::elasticlunr::pipeline;
use libs::elasticlunr::pipeline::TokenizerFn;
use libs::elasticlunr::{Index, Language};
use libs::elasticlunr::{lang, Index, IndexBuilder};
use libs::once_cell::sync::Lazy;
use config::{Config, Search};
@ -27,25 +25,24 @@ static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
builder
});
fn build_fields(search_config: &Search) -> Vec<String> {
let mut fields = vec![];
fn build_fields(search_config: &Search, mut index: IndexBuilder) -> IndexBuilder {
if search_config.include_title {
fields.push("title".to_owned());
index = index.add_field("title");
}
if search_config.include_description {
fields.push("description".to_owned());
index = index.add_field("description");
}
if search_config.include_path {
fields.push("path".to_owned());
index = index.add_field_with_tokenizer("path", Box::new(path_tokenizer));
}
if search_config.include_content {
fields.push("body".to_owned());
index = index.add_field("body")
}
fields
index
}
fn path_tokenizer(text: &str) -> Vec<String> {
@ -55,34 +52,6 @@ fn path_tokenizer(text: &str) -> Vec<String> {
.collect()
}
fn build_tokenizers(search_config: &Search, language: Language) -> Vec<TokenizerFn> {
let text_tokenizer = match language {
#[cfg(feature = "indexing-zh")]
Language::Chinese => pipeline::tokenize_chinese,
#[cfg(feature = "indexing-ja")]
Language::Japanese => pipeline::tokenize_japanese,
_ => pipeline::tokenize,
};
let mut tokenizers: Vec<TokenizerFn> = vec![];
if search_config.include_title {
tokenizers.push(text_tokenizer);
}
if search_config.include_description {
tokenizers.push(text_tokenizer);
}
if search_config.include_path {
tokenizers.push(path_tokenizer);
}
if search_config.include_content {
tokenizers.push(text_tokenizer);
}
tokenizers
}
fn fill_index(
search_config: &Search,
title: &Option<String>,
@ -126,26 +95,20 @@ fn fill_index(
/// Errors if the language given is not available in Elasticlunr
/// TODO: is making `in_search_index` apply to subsections of a `false` section useful?
pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result<String> {
let language = match Language::from_code(lang) {
let language = match lang::from_code(lang) {
Some(l) => l,
None => {
bail!("Tried to build search index for language {} which is not supported", lang);
}
};
let language_options = &config.languages[lang];
let mut index = Index::with_language(language, &build_fields(&language_options.search));
let tokenizers = build_tokenizers(&language_options.search, language);
let mut index = IndexBuilder::with_language(language);
index = build_fields(&language_options.search, index);
let mut index = index.build();
for (_, section) in &library.sections {
if section.lang == lang {
add_section_to_index(
&mut index,
section,
library,
&language_options.search,
tokenizers.clone(),
);
add_section_to_index(&mut index, section, library, &language_options.search);
}
}
@ -157,7 +120,6 @@ fn add_section_to_index(
section: &Section,
library: &Library,
search_config: &Search,
tokenizers: Vec<TokenizerFn>,
) {
if !section.meta.in_search_index {
return;
@ -165,7 +127,7 @@ fn add_section_to_index(
// Don't index redirecting sections
if section.meta.redirect_to.is_none() {
index.add_doc_with_tokenizers(
index.add_doc(
&section.permalink,
&fill_index(
search_config,
@ -174,7 +136,6 @@ fn add_section_to_index(
&section.path,
&section.content,
),
tokenizers.clone(),
);
}
@ -184,7 +145,7 @@ fn add_section_to_index(
continue;
}
index.add_doc_with_tokenizers(
index.add_doc(
&page.permalink,
&fill_index(
search_config,
@ -193,7 +154,6 @@ fn add_section_to_index(
&page.path,
&page.content,
),
tokenizers.clone(),
);
}
}
@ -207,21 +167,21 @@ mod tests {
#[test]
fn can_build_fields() {
let mut config = Config::default();
let fields = build_fields(&config.search);
assert_eq!(fields, vec!["title", "body"]);
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["title", "body"]);
config.search.include_content = false;
config.search.include_description = true;
let fields = build_fields(&config.search);
assert_eq!(fields, vec!["title", "description"]);
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["title", "description"]);
config.search.include_content = true;
let fields = build_fields(&config.search);
assert_eq!(fields, vec!["title", "description", "body"]);
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["title", "description", "body"]);
config.search.include_title = false;
let fields = build_fields(&config.search);
assert_eq!(fields, vec!["description", "body"]);
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["description", "body"]);
}
#[test]

1
themes/abridge Submodule

@ -0,0 +1 @@
Subproject commit 6bdc489863018b2cb97541ac019a49aa1364537b