Add bibtex support to load_data() (#1190)

* Add support for loading Bibtex data.

* Add load_data() documentation for the bibtex format

* Force bibtex tags to be lower case.

Bibtex tags are case-insensitive, and this works around tera's case-sensitiveness.

* Improve the load_data() documentation for the bibtex format
This commit is contained in:
Renato Caldas 2020-11-21 10:44:42 +00:00 committed by Vincent Prouillet
parent 92282608fe
commit 2673466998
4 changed files with 217 additions and 4 deletions

111
Cargo.lock generated
View file

@ -51,6 +51,12 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "arrayvec"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
[[package]]
name = "assert-json-diff"
version = "1.1.0"
@ -161,6 +167,18 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
[[package]]
name = "bytecount"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8"
[[package]]
name = "bytecount"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0017894339f586ccb943b01b9555de56770c11cda818e7e3d8bd93f4ed7f46e"
[[package]]
name = "bytemuck"
version = "1.4.1"
@ -1158,6 +1176,19 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73a004f877f468548d8d0ac4977456a249d8fabbdb8416c36db163dfc8f2e8ca"
[[package]]
name = "lexical-core"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db65c6da02e61f55dae90a0ae427b2a5f6b3e8db09f58d10efab23af92592616"
dependencies = [
"arrayvec",
"bitflags",
"cfg-if",
"ryu",
"static_assertions",
]
[[package]]
name = "libc"
version = "0.2.80"
@ -1483,6 +1514,73 @@ dependencies = [
"libc",
]
[[package]]
name = "nom"
version = "5.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
dependencies = [
"lexical-core",
"memchr",
"version_check",
]
[[package]]
name = "nom-bibtex"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9db257f6c7b9c8b3ab67ee6a4b23a290c157d183fef2ac065bf9fce5f1c1299"
dependencies = [
"nom",
"nom-tracable",
"nom_locate 2.0.0",
"quick-error",
]
[[package]]
name = "nom-tracable"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e012c742e1269f801f6bfe0d1ebf99d7a3f7bc1d65c970bab0e7bee439e31610"
dependencies = [
"nom",
"nom-tracable-macros",
"nom_locate 1.0.0",
"nom_locate 2.0.0",
]
[[package]]
name = "nom-tracable-macros"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65ad630ff46d4c61da89042f327e6fdf104a6ebb667565727ef0bb294a7c3197"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "nom_locate"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f932834fd8e391fc7710e2ba17e8f9f8645d846b55aa63207e17e110a1e1ce35"
dependencies = [
"bytecount 0.3.2",
"memchr",
"nom",
]
[[package]]
name = "nom_locate"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e4726500a3d0297dd38edc169d919ad997a9931b4645b59ce0231e88536e213"
dependencies = [
"bytecount 0.6.0",
"memchr",
"nom",
]
[[package]]
name = "notify"
version = "4.0.15"
@ -1849,6 +1947,12 @@ dependencies = [
"unicase",
]
[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quote"
version = "1.0.7"
@ -2317,6 +2421,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "string_cache"
version = "0.8.0"
@ -2438,6 +2548,7 @@ dependencies = [
"lazy_static",
"library",
"mockito",
"nom-bibtex",
"pulldown-cmark",
"reqwest",
"serde_json",

View file

@ -15,6 +15,7 @@ image = "0.23"
serde_json = "1.0"
sha2 = "0.9"
url = "2"
nom-bibtex = "0.3"
errors = { path = "../errors" }
utils = { path = "../utils" }

View file

@ -28,6 +28,7 @@ enum OutputFormat {
Toml,
Json,
Csv,
Bibtex,
Plain,
}
@ -51,6 +52,7 @@ impl FromStr for OutputFormat {
"toml" => Ok(OutputFormat::Toml),
"csv" => Ok(OutputFormat::Csv),
"json" => Ok(OutputFormat::Json),
"bibtex" => Ok(OutputFormat::Bibtex),
"plain" => Ok(OutputFormat::Plain),
format => Err(format!("Unknown output format {}", format).into()),
}
@ -63,6 +65,7 @@ impl OutputFormat {
OutputFormat::Json => "application/json",
OutputFormat::Csv => "text/csv",
OutputFormat::Toml => "application/toml",
OutputFormat::Bibtex => "application/x-bibtex",
OutputFormat::Plain => "text/plain",
})
}
@ -148,7 +151,7 @@ fn get_output_format_from_args(
let format_arg = optional_arg!(
String,
args.get("format"),
"`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml, plain)"
"`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml, bibtex, plain)"
);
if let Some(format) = format_arg {
@ -169,7 +172,7 @@ fn get_output_format_from_args(
}
/// A Tera function to load data from a file or from a URL
/// Currently the supported formats are json, toml, csv and plain text
/// Currently the supported formats are json, toml, csv, bibtex and plain text
#[derive(Debug)]
pub struct LoadData {
base_path: PathBuf,
@ -223,6 +226,7 @@ impl TeraFn for LoadData {
OutputFormat::Toml => load_toml(data),
OutputFormat::Csv => load_csv(data),
OutputFormat::Json => load_json(data),
OutputFormat::Bibtex => load_bibtex(data),
OutputFormat::Plain => to_value(data).map_err(|e| e.into()),
};
@ -252,6 +256,51 @@ fn load_toml(toml_data: String) -> Result<Value> {
}
}
/// Parse a BIBTEX string and convert it to a Tera Value
fn load_bibtex(bibtex_data: String) -> Result<Value> {
let bibtex_model = nom_bibtex::Bibtex::parse(&bibtex_data).map_err(|e| format!("{:?}", e))?;
let mut bibtex_map = Map::new();
let preambles_array = bibtex_model.preambles()
.iter()
.map(|v| Value::String(v.to_string()))
.collect();
bibtex_map.insert(String::from("preambles"), Value::Array(preambles_array));
let comments_array = bibtex_model.comments()
.iter()
.map(|v| Value::String(v.to_string()))
.collect();
bibtex_map.insert(String::from("comments"), Value::Array(comments_array));
let mut variables_map = Map::new();
for (key,val) in bibtex_model.variables() {
variables_map.insert(key.to_string(), Value::String(val.to_string()));
}
bibtex_map.insert(String::from("variables"), Value::Object(variables_map));
let bibliographies_array = bibtex_model.bibliographies()
.iter()
.map(|b| {
let mut m = Map::new();
m.insert(String::from("entry_type"), Value::String(b.entry_type().to_string()));
m.insert(String::from("citation_key"), Value::String(b.citation_key().to_string()));
let mut tags = Map::new();
for (key, val) in b.tags() {
tags.insert(key.to_lowercase().to_string(), Value::String(val.to_string()));
}
m.insert(String::from("tags"), Value::Object(tags));
Value::Object(m)
})
.collect();
bibtex_map.insert(String::from("bibliographies"), Value::Array(bibliographies_array));
let bibtex_value: Value = Value::Object(bibtex_map);
to_value(bibtex_value).map_err(|err| err.into())
}
/// Parse a CSV string and convert it to a Tera Value
///
/// An example csv file `example.csv` could be:

View file

@ -202,7 +202,7 @@ items: Array<TaxonomyTerm>;
See the [Taxonomies documentation](@/documentation/templates/taxonomies.md) for a full documentation of those types.
### `load_data`
Loads data from a file or URL. Supported file types include *toml*, *json* and *csv*.
Loads data from a file or URL. Supported file types include *toml*, *json*, *csv* and *bibtex*.
Any other file type will be loaded as plain text.
The `path` argument specifies the path to the data file relative to your base directory, where your `config.toml` is.
@ -213,7 +213,7 @@ As a security precaution, if this file is outside the main site directory, your
```
The optional `format` argument allows you to specify and override which data type is contained
within the file specified in the `path` argument. Valid entries are `toml`, `json`, `csv`
within the file specified in the `path` argument. Valid entries are `toml`, `json`, `csv`, `bibtex`
or `plain`. If the `format` argument isn't specified, then the path extension is used.
```jinja2
@ -251,6 +251,58 @@ template:
}
```
The `bibtex` format loads data into a structure matching the format used by the
[nom-bibtex crate](https://crates.io/crates/nom-bibtex). The following is an example of data
in bibtex format:
```
@preamble{"A bibtex preamble" # " this is."}
@Comment{
Here is a comment.
}
Another comment!
@string(name = "Vincent Prouillet")
@string(github = "https://github.com/getzola/zola")
@misc {my_citation_key,
author= name,
title = "Zola",
note = "github: " # github
} }
```
The following is the json-equivalent format of the produced bibtex data structure:
```json
{
"preambles": ["A bibtex preamble this is."],
"comments": ["Here is a comment.", "Another comment!"],
"variables": {
"name": "Vincent Prouillet",
"github": "https://github.com/getzola/zola"
},
"bibliographies": [
{
"entry_type": "misc",
"citation_key": "my_citation_key",
"tags": {
"author": "Vincent Prouillet",
"title": "Zola",
"note": "github: https://github.com/getzola/zola"
}
}
]
}
```
Finally, the bibtex data can be accessed from the template as follows:
```jinja2
{% set tags = data.bibliographies[0].tags %}
This was generated using {{ tags.title }}, authored by {{ tags.author }}.
```
#### Remote content
Instead of using a file, you can load data from a remote URL. This can be done by specifying a `url` parameter