Updated icon downloading

- Added more checks to prevent panics (Removed unwrap)
- Try do download from base domain or add www when the provided domain
  fails
- Added some more domain validation checks to prevent errors
- Added the ICON_BLACKLIST_REGEX to a Lazy Static HashMap which
  speeds-up the checks!
- Validate the Regex before starting/config change.
- Some cleanups
- Disabled some noisy debugging from 2 crates.
This commit is contained in:
BlackDex 2020-12-08 17:33:15 +01:00
parent 6530904883
commit 48baf723a4
4 changed files with 138 additions and 40 deletions

View file

@ -104,7 +104,8 @@
## Icon blacklist Regex ## Icon blacklist Regex
## Any domains or IPs that match this regex won't be fetched by the icon service. ## Any domains or IPs that match this regex won't be fetched by the icon service.
## Useful to hide other servers in the local network. Check the WIKI for more details ## Useful to hide other servers in the local network. Check the WIKI for more details
# ICON_BLACKLIST_REGEX=192\.168\.1\.[0-9].*^ ## NOTE: Always enclose this regex withing single quotes!
# ICON_BLACKLIST_REGEX='^(192\.168\.0\.[0-9]+|192\.168\.1\.[0-9]+)$'
## Any IP which is not defined as a global IP will be blacklisted. ## Any IP which is not defined as a global IP will be blacklisted.
## Useful to secure your internal environment: See https://en.wikipedia.org/wiki/Reserved_IP_addresses for a list of IPs which it will block ## Useful to secure your internal environment: See https://en.wikipedia.org/wiki/Reserved_IP_addresses for a list of IPs which it will block

View file

@ -1,7 +1,9 @@
use std::{ use std::{
collections::HashMap,
fs::{create_dir_all, remove_file, symlink_metadata, File}, fs::{create_dir_all, remove_file, symlink_metadata, File},
io::prelude::*, io::prelude::*,
net::{IpAddr, ToSocketAddrs}, net::{IpAddr, ToSocketAddrs},
sync::RwLock,
time::{Duration, SystemTime}, time::{Duration, SystemTime},
}; };
@ -28,20 +30,48 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
.unwrap() .unwrap()
}); });
static ICON_REL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"icon$|apple.*icon").unwrap()); // Build Regex only once since this takes a lot of time.
static ICON_HREF_REGEX: Lazy<Regex> = static ICON_REL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)icon$|apple.*icon").unwrap());
Lazy::new(|| Regex::new(r"(?i)\w+\.(jpg|jpeg|png|ico)(\?.*)?$|^data:image.*base64").unwrap());
static ICON_SIZE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap()); static ICON_SIZE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap());
// Special HashMap which holds the user defined Regex to speedup matching the regex.
static ICON_BLACKLIST_REGEX: Lazy<RwLock<HashMap<String, Regex>>> = Lazy::new(|| RwLock::new(HashMap::new()));
#[get("/<domain>/icon.png")]
fn icon(domain: String) -> Option<Cached<Content<Vec<u8>>>> {
if !is_valid_domain(&domain) {
warn!("Invalid domain: {}", domain);
return None;
}
get_icon(&domain).map(|icon| Cached::long(Content(ContentType::new("image", "x-icon"), icon)))
}
/// Returns if the domain provided is valid or not.
///
/// This does some manual checks and makes use of Url to do some basic checking.
/// domains can't be larger then 63 characters (not counting multiple subdomains) according to the RFC's, but we limit the total size to 255.
fn is_valid_domain(domain: &str) -> bool { fn is_valid_domain(domain: &str) -> bool {
// Don't allow empty or too big domains or path traversal // If parsing the domain fails using Url, it will not work with reqwest.
if domain.is_empty() || domain.len() > 255 || domain.contains("..") { if let Err(parse_error) = Url::parse(format!("https://{}", domain).as_str()) {
debug!("Domain parse error: '{}' - {:?}", domain, parse_error);
return false;
} else if domain.is_empty()
|| domain.contains("..")
|| domain.starts_with('.')
|| domain.starts_with('-')
|| domain.ends_with('-')
{
debug!("Domain validation error: '{}' is either empty, contains '..', starts with an '.', starts or ends with a '-'", domain);
return false;
} else if domain.len() > 255 {
debug!("Domain validation error: '{}' exceeds 255 characters", domain);
return false; return false;
} }
// Only alphanumeric or specific characters
for c in domain.chars() { for c in domain.chars() {
if !c.is_alphanumeric() && !ALLOWED_CHARS.contains(c) { if !c.is_alphanumeric() && !ALLOWED_CHARS.contains(c) {
debug!("Domain validation error: '{}' contains an invalid character '{}'", domain, c);
return false; return false;
} }
} }
@ -49,21 +79,10 @@ fn is_valid_domain(domain: &str) -> bool {
true true
} }
#[get("/<domain>/icon.png")]
fn icon(domain: String) -> Option<Cached<Content<Vec<u8>>>> {
if !is_valid_domain(&domain) {
warn!("Invalid domain: {:#?}", domain);
return None;
}
get_icon(&domain).map(|icon| {
Cached::long(Content(ContentType::new("image", "x-icon"), icon))
})
}
/// TODO: This is extracted from IpAddr::is_global, which is unstable: /// TODO: This is extracted from IpAddr::is_global, which is unstable:
/// https://doc.rust-lang.org/nightly/std/net/enum.IpAddr.html#method.is_global /// https://doc.rust-lang.org/nightly/std/net/enum.IpAddr.html#method.is_global
/// Remove once https://github.com/rust-lang/rust/issues/27709 is merged /// Remove once https://github.com/rust-lang/rust/issues/27709 is merged
#[allow(clippy::nonminimal_bool)]
#[cfg(not(feature = "unstable"))] #[cfg(not(feature = "unstable"))]
fn is_global(ip: IpAddr) -> bool { fn is_global(ip: IpAddr) -> bool {
match ip { match ip {
@ -159,7 +178,7 @@ mod tests {
} }
} }
fn check_icon_domain_is_blacklisted(domain: &str) -> bool { fn is_domain_blacklisted(domain: &str) -> bool {
let mut is_blacklisted = CONFIG.icon_blacklist_non_global_ips() let mut is_blacklisted = CONFIG.icon_blacklist_non_global_ips()
&& (domain, 0) && (domain, 0)
.to_socket_addrs() .to_socket_addrs()
@ -177,7 +196,31 @@ fn check_icon_domain_is_blacklisted(domain: &str) -> bool {
// Skip the regex check if the previous one is true already // Skip the regex check if the previous one is true already
if !is_blacklisted { if !is_blacklisted {
if let Some(blacklist) = CONFIG.icon_blacklist_regex() { if let Some(blacklist) = CONFIG.icon_blacklist_regex() {
let regex = Regex::new(&blacklist).expect("Valid Regex"); let mut regex_hashmap = ICON_BLACKLIST_REGEX.read().unwrap();
// Use the pre-generate Regex stored in a Lazy HashMap if there's one, else generate it.
let regex = if let Some(regex) = regex_hashmap.get(&blacklist) {
regex
} else {
drop(regex_hashmap);
let mut regex_hashmap_write = ICON_BLACKLIST_REGEX.write().unwrap();
// Clear the current list if the previous key doesn't exists.
// To prevent growing of the HashMap after someone has changed it via the admin interface.
if regex_hashmap_write.len() >= 1 {
regex_hashmap_write.clear();
}
// Generate the regex to store in too the Lazy Static HashMap.
let blacklist_regex = Regex::new(&blacklist).unwrap();
regex_hashmap_write.insert(blacklist.to_string(), blacklist_regex);
drop(regex_hashmap_write);
regex_hashmap = ICON_BLACKLIST_REGEX.read().unwrap();
regex_hashmap.get(&blacklist).unwrap()
};
// Use the pre-generate Regex stored in a Lazy HashMap.
if regex.is_match(&domain) { if regex.is_match(&domain) {
warn!("Blacklisted domain: {:#?} matched {:#?}", domain, blacklist); warn!("Blacklisted domain: {:#?} matched {:#?}", domain, blacklist);
is_blacklisted = true; is_blacklisted = true;
@ -213,8 +256,7 @@ fn get_icon(domain: &str) -> Option<Vec<u8>> {
Err(e) => { Err(e) => {
error!("Error downloading icon: {:?}", e); error!("Error downloading icon: {:?}", e);
let miss_indicator = path + ".miss"; let miss_indicator = path + ".miss";
let empty_icon = Vec::new(); save_icon(&miss_indicator, &[]);
save_icon(&miss_indicator, &empty_icon);
None None
} }
} }
@ -307,11 +349,51 @@ fn get_icon_url(domain: &str) -> Result<(Vec<Icon>, String), Error> {
// Some sites have extra security in place with for example XSRF Tokens. // Some sites have extra security in place with for example XSRF Tokens.
let mut cookie_str = String::new(); let mut cookie_str = String::new();
let resp = get_page(&ssldomain).or_else(|_| get_page(&httpdomain)); // First check the domain as given during the request for both HTTPS and HTTP.
let resp = match get_page(&ssldomain).or_else(|_| get_page(&httpdomain)) {
Ok(c) => Ok(c),
Err(e) => {
let mut sub_resp = Err(e);
// When the domain is not an IP, and has more then one dot, remove all subdomains.
let is_ip = domain.parse::<IpAddr>();
if is_ip.is_err() && domain.matches('.').count() > 1 {
let mut domain_parts = domain.split('.');
let base_domain = format!(
"{base}.{tld}",
tld = domain_parts.next_back().unwrap(),
base = domain_parts.next_back().unwrap()
);
if is_valid_domain(&base_domain) {
let sslbase = format!("https://{}", base_domain);
let httpbase = format!("http://{}", base_domain);
debug!("[get_icon_url]: Trying without subdomains '{}'", base_domain);
sub_resp = get_page(&sslbase).or_else(|_| get_page(&httpbase));
}
// When the domain is not an IP, and has less then 2 dots, try to add www. infront of it.
} else if is_ip.is_err() && domain.matches('.').count() < 2 {
let www_domain = format!("www.{}", domain);
if is_valid_domain(&www_domain) {
let sslwww = format!("https://{}", www_domain);
let httpwww = format!("http://{}", www_domain);
debug!("[get_icon_url]: Trying with www. prefix '{}'", www_domain);
sub_resp = get_page(&sslwww).or_else(|_| get_page(&httpwww));
}
}
sub_resp
}
};
if let Ok(content) = resp { if let Ok(content) = resp {
// Extract the URL from the respose in case redirects occured (like @ gitlab.com) // Extract the URL from the respose in case redirects occured (like @ gitlab.com)
let url = content.url().clone(); let url = content.url().clone();
// Get all the cookies and pass it on to the next function.
// Needed for XSRF Cookies for example (like @ mijn.ing.nl)
let raw_cookies = content.headers().get_all("set-cookie"); let raw_cookies = content.headers().get_all("set-cookie");
cookie_str = raw_cookies cookie_str = raw_cookies
.iter() .iter()
@ -337,14 +419,18 @@ fn get_icon_url(domain: &str) -> Result<(Vec<Icon>, String), Error> {
let favicons = soup let favicons = soup
.tag("link") .tag("link")
.attr("rel", ICON_REL_REGEX.clone()) // Only use icon rels .attr("rel", ICON_REL_REGEX.clone()) // Only use icon rels
.attr("href", ICON_HREF_REGEX.clone()) // Only allow specific extensions .attr_name("href") // Make sure there is a href
.find_all(); .find_all();
// Loop through all the found icons and determine it's priority // Loop through all the found icons and determine it's priority
for favicon in favicons { for favicon in favicons {
let sizes = favicon.get("sizes"); let sizes = favicon.get("sizes");
let href = favicon.get("href").expect("Missing href"); let href = favicon.get("href").unwrap();
let full_href = url.join(&href).unwrap().into_string(); // Skip invalid url's
let full_href = match url.join(&href) {
Ok(h) => h.into_string(),
_ => continue,
};
let priority = get_icon_priority(&full_href, sizes); let priority = get_icon_priority(&full_href, sizes);
@ -368,20 +454,18 @@ fn get_page(url: &str) -> Result<Response, Error> {
} }
fn get_page_with_cookies(url: &str, cookie_str: &str) -> Result<Response, Error> { fn get_page_with_cookies(url: &str, cookie_str: &str) -> Result<Response, Error> {
if check_icon_domain_is_blacklisted(Url::parse(url).unwrap().host_str().unwrap_or_default()) { if is_domain_blacklisted(Url::parse(url).unwrap().host_str().unwrap_or_default()) {
err!("Favicon rel linked to a non blacklisted domain!"); err!("Favicon rel linked to a blacklisted domain!");
} }
if cookie_str.is_empty() { let mut client = CLIENT.get(url);
CLIENT.get(url).send()?.error_for_status().map_err(Into::into) if !cookie_str.is_empty() {
} else { client = client.header("cookie", cookie_str)
CLIENT }
.get(url)
.header("cookie", cookie_str) client.send()?
.send()?
.error_for_status() .error_for_status()
.map_err(Into::into) .map_err(Into::into)
}
} }
/// Returns a Integer with the priority of the type of the icon which to prefer. /// Returns a Integer with the priority of the type of the icon which to prefer.
@ -464,7 +548,7 @@ fn parse_sizes(sizes: Option<String>) -> (u16, u16) {
} }
fn download_icon(domain: &str) -> Result<Vec<u8>, Error> { fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
if check_icon_domain_is_blacklisted(domain) { if is_domain_blacklisted(domain) {
err!("Domain is blacklisted", domain) err!("Domain is blacklisted", domain)
} }
@ -495,7 +579,7 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
res.copy_to(&mut buffer)?; res.copy_to(&mut buffer)?;
break; break;
} }
Err(_) => info!("Download failed for {}", icon.href), Err(_) => warn!("Download failed for {}", icon.href),
}; };
} }
} }

View file

@ -527,6 +527,16 @@ fn validate_config(cfg: &ConfigItems) -> Result<(), Error> {
} }
} }
// Check if the icon blacklist regex is valid
if let Some(ref r) = cfg.icon_blacklist_regex {
use regex::Regex;
let validate_regex = Regex::new(&r);
match validate_regex {
Ok(_) => (),
Err(e) => err!(format!("`ICON_BLACKLIST_REGEX` is invalid: {:#?}", e)),
}
}
Ok(()) Ok(())
} }

View file

@ -113,6 +113,9 @@ fn init_logging(level: log::LevelFilter) -> Result<(), fern::InitError> {
.level_for("launch_", log::LevelFilter::Off) .level_for("launch_", log::LevelFilter::Off)
.level_for("rocket::rocket", log::LevelFilter::Off) .level_for("rocket::rocket", log::LevelFilter::Off)
.level_for("rocket::fairing", log::LevelFilter::Off) .level_for("rocket::fairing", log::LevelFilter::Off)
// Never show html5ever and hyper::proto logs, too noisy
.level_for("html5ever", log::LevelFilter::Off)
.level_for("hyper::proto", log::LevelFilter::Off)
.chain(std::io::stdout()); .chain(std::io::stdout());
// Enable smtp debug logging only specifically for smtp when need. // Enable smtp debug logging only specifically for smtp when need.