Fix/check format of checker

This commit is contained in:
Tom Parker-Shemilt 2021-10-16 21:56:51 +01:00
parent c964fcbab4
commit 8537eec865
3 changed files with 127 additions and 79 deletions

View file

@ -20,6 +20,7 @@ jobs:
profile: minimal profile: minimal
toolchain: stable toolchain: stable
override: true override: true
components: rustfmt
- uses: Swatinem/rust-cache@v1 - uses: Swatinem/rust-cache@v1
- name: Get random cache id - name: Get random cache id
run: echo "CACHE_ID=$((RANDOM))" >> $GITHUB_ENV run: echo "CACHE_ID=$((RANDOM))" >> $GITHUB_ENV
@ -32,6 +33,8 @@ jobs:
results-${{ hashFiles('Cargo.lock') }}-${{ hashFiles('README.md') }}- results-${{ hashFiles('Cargo.lock') }}-${{ hashFiles('README.md') }}-
results-${{ hashFiles('Cargo.lock') }}- results-${{ hashFiles('Cargo.lock') }}-
results- results-
- name: Check Format
run: cargo fmt -- --check
- name: Build - name: Build
run: cargo build run: cargo build
- name: Run - name: Run

View file

@ -40,5 +40,6 @@ fn main() {
let fixed_contents = fix_dashes(lines); let fixed_contents = fix_dashes(lines);
// Write the awesome file. // Write the awesome file.
fs::write("README.md", fixed_contents.join("\n").as_bytes()).expect("Failed to write to the file"); fs::write("README.md", fixed_contents.join("\n").as_bytes())
.expect("Failed to write to the file");
} }

View file

@ -1,21 +1,21 @@
use pulldown_cmark::{Parser, Event, Tag}; use chrono::{DateTime, Duration, Local};
use diffy::create_patch;
use failure::{format_err, Error, Fail};
use futures::future::{select_all, BoxFuture, FutureExt};
use lazy_static::lazy_static;
use log::{debug, info, warn};
use pulldown_cmark::{Event, Parser, Tag};
use regex::Regex;
use reqwest::{header, redirect::Policy, Client, StatusCode, Url};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
use std::env;
use std::io::Write;
use std::time;
use std::u8; use std::u8;
use std::{cmp::Ordering, fs}; use std::{cmp::Ordering, fs};
use futures::future::{select_all, BoxFuture, FutureExt};
use std::collections::{BTreeSet, BTreeMap};
use serde::{Serialize, Deserialize};
use lazy_static::lazy_static;
use std::time;
use log::{warn, debug, info};
use std::io::Write;
use reqwest::{Client, redirect::Policy, StatusCode, header, Url};
use regex::Regex;
use failure::{Fail, Error, format_err};
use chrono::{Local, DateTime, Duration};
use std::env;
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
use tokio::sync::SemaphorePermit; use tokio::sync::SemaphorePermit;
use diffy::create_patch;
const MINIMUM_GITHUB_STARS: u32 = 50; const MINIMUM_GITHUB_STARS: u32 = 50;
const MINIMUM_CARGO_DOWNLOADS: u32 = 2000; const MINIMUM_CARGO_DOWNLOADS: u32 = 2000;
@ -79,9 +79,7 @@ enum CheckerError {
TooManyRequests, TooManyRequests,
#[fail(display = "reqwest error: {}", error)] #[fail(display = "reqwest error: {}", error)]
ReqwestError { ReqwestError { error: String },
error: String,
},
#[fail(display = "travis build is unknown")] #[fail(display = "travis build is unknown")]
TravisBuildUnknown, TravisBuildUnknown,
@ -92,16 +90,14 @@ enum CheckerError {
fn formatter(err: &CheckerError, url: &String) -> String { fn formatter(err: &CheckerError, url: &String) -> String {
match err { match err {
CheckerError::HttpError {status, location} => { CheckerError::HttpError { status, location } => match location {
match location {
Some(loc) => { Some(loc) => {
format!("[{}] {} -> {}", status, url, loc) format!("[{}] {} -> {}", status, url, loc)
} }
None => { None => {
format!("[{}] {}", status, url) format!("[{}] {}", status, url)
} }
} },
}
CheckerError::TravisBuildUnknown => { CheckerError::TravisBuildUnknown => {
format!("[Unknown travis build] {}", url) format!("[Unknown travis build] {}", url)
} }
@ -115,16 +111,18 @@ fn formatter(err: &CheckerError, url: &String) -> String {
} }
struct MaxHandles { struct MaxHandles {
remaining: Semaphore remaining: Semaphore,
} }
struct Handle<'a> { struct Handle<'a> {
_permit: SemaphorePermit<'a> _permit: SemaphorePermit<'a>,
} }
impl MaxHandles { impl MaxHandles {
fn new(max: usize) -> MaxHandles { fn new(max: usize) -> MaxHandles {
MaxHandles { remaining: Semaphore::new(max) } MaxHandles {
remaining: Semaphore::new(max),
}
} }
async fn get<'a>(&'a self) -> Handle<'a> { async fn get<'a>(&'a self) -> Handle<'a> {
@ -157,25 +155,29 @@ fn get_url(url: String) -> BoxFuture<'static, (String, Result<(), CheckerError>)
async move { async move {
let _handle = HANDLES.get().await; let _handle = HANDLES.get().await;
return get_url_core(url).await; return get_url_core(url).await;
}.boxed() }
.boxed()
} }
lazy_static! { lazy_static! {
static ref GITHUB_REPO_REGEX: Regex = Regex::new(r"^https://github.com/(?P<org>[^/]+)/(?P<repo>[^/]+)/?$").unwrap(); static ref GITHUB_REPO_REGEX: Regex =
Regex::new(r"^https://github.com/(?P<org>[^/]+)/(?P<repo>[^/]+)/?$").unwrap();
static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap(); static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap();
static ref CRATE_REGEX: Regex = Regex::new(r"https://crates.io/crates/(?P<crate>[^/]+)/?$").unwrap(); static ref CRATE_REGEX: Regex =
Regex::new(r"https://crates.io/crates/(?P<crate>[^/]+)/?$").unwrap();
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
struct GithubStars { struct GithubStars {
stargazers_count: u32 stargazers_count: u32,
} }
async fn get_stars(github_url: &str) -> Option<u32> { async fn get_stars(github_url: &str) -> Option<u32> {
warn!("Downloading Github stars for {}", github_url); warn!("Downloading Github stars for {}", github_url);
let rewritten = GITHUB_REPO_REGEX.replace_all(&github_url, "https://api.github.com/repos/$org/$repo").to_string(); let rewritten = GITHUB_REPO_REGEX
let mut req = CLIENT .replace_all(&github_url, "https://api.github.com/repos/$org/$repo")
.get(&rewritten); .to_string();
let mut req = CLIENT.get(&rewritten);
if let Ok(username) = env::var("GITHUB_USERNAME") { if let Ok(username) = env::var("GITHUB_USERNAME") {
if let Ok(password) = env::var("GITHUB_TOKEN") { if let Ok(password) = env::var("GITHUB_TOKEN") {
// needs a token with at least public_repo scope // needs a token with at least public_repo scope
@ -204,20 +206,21 @@ async fn get_stars(github_url: &str) -> Option<u32> {
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
struct CrateInfo { struct CrateInfo {
downloads: u64 downloads: u64,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
struct Crate { struct Crate {
#[serde(rename = "crate")] #[serde(rename = "crate")]
info: CrateInfo info: CrateInfo,
} }
async fn get_downloads(github_url: &str) -> Option<u64> { async fn get_downloads(github_url: &str) -> Option<u64> {
warn!("Downloading Crates downloads for {}", github_url); warn!("Downloading Crates downloads for {}", github_url);
let rewritten = CRATE_REGEX.replace_all(&github_url, "https://crates.io/api/v1/crates/$crate").to_string(); let rewritten = CRATE_REGEX
let req = CLIENT .replace_all(&github_url, "https://crates.io/api/v1/crates/$crate")
.get(&rewritten); .to_string();
let req = CLIENT.get(&rewritten);
let resp = req.send().await; let resp = req.send().await;
match resp { match resp {
@ -351,7 +354,7 @@ fn get_url_core(url: String) -> BoxFuture<'static, (String, Result<(), CheckerEr
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
enum Working { enum Working {
Yes, Yes,
No(CheckerError) No(CheckerError),
} }
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
@ -366,7 +369,7 @@ type Results = BTreeMap<String, Link>;
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
struct PopularityData { struct PopularityData {
pub github_stars: BTreeMap<String, u32>, pub github_stars: BTreeMap<String, u32>,
pub cargo_downloads: BTreeMap<String, u32> pub cargo_downloads: BTreeMap<String, u32>,
} }
#[tokio::main] #[tokio::main]
@ -384,11 +387,16 @@ async fn main() -> Result<(), Error> {
let mut popularity_data: PopularityData = fs::read_to_string("results/popularity.yaml") let mut popularity_data: PopularityData = fs::read_to_string("results/popularity.yaml")
.map_err(|e| format_err!("{}", e)) .map_err(|e| format_err!("{}", e))
.and_then(|x| serde_yaml::from_str(&x).map_err(|e| format_err!("{}", e))) .and_then(|x| serde_yaml::from_str(&x).map_err(|e| format_err!("{}", e)))
.unwrap_or(PopularityData { github_stars: BTreeMap::new(), cargo_downloads: BTreeMap::new()}); .unwrap_or(PopularityData {
github_stars: BTreeMap::new(),
cargo_downloads: BTreeMap::new(),
});
// Overrides for popularity count, reasons at the top of the file // Overrides for popularity count, reasons at the top of the file
for url in POPULARITY_OVERRIDES.iter() { for url in POPULARITY_OVERRIDES.iter() {
popularity_data.github_stars.insert(url.clone(), MINIMUM_GITHUB_STARS); popularity_data
.github_stars
.insert(url.clone(), MINIMUM_GITHUB_STARS);
} }
let mut url_checks = vec![]; let mut url_checks = vec![];
@ -420,7 +428,7 @@ async fn main() -> Result<(), Error> {
#[derive(Debug)] #[derive(Debug)]
struct ListInfo { struct ListInfo {
location: usize, location: usize,
data: Vec<String> data: Vec<String>,
} }
let mut list_items: Vec<ListInfo> = Vec::new(); let mut list_items: Vec<ListInfo> = Vec::new();
@ -447,29 +455,37 @@ async fn main() -> Result<(), Error> {
// Use existing star data, but re-retrieve url to check aliveness // Use existing star data, but re-retrieve url to check aliveness
// Some will have overrides, so don't check the regex yet // Some will have overrides, so don't check the regex yet
github_stars = Some(*stars) github_stars = Some(*stars)
} } else if GITHUB_REPO_REGEX.is_match(&url) && existing.is_none() {
else if GITHUB_REPO_REGEX.is_match(&url) && existing.is_none() {
github_stars = get_stars(&url).await; github_stars = get_stars(&url).await;
if let Some(raw_stars) = github_stars { if let Some(raw_stars) = github_stars {
popularity_data.github_stars.insert(new_url, raw_stars); popularity_data.github_stars.insert(new_url, raw_stars);
if raw_stars >= required_stars { if raw_stars >= required_stars {
fs::write("results/popularity.yaml", serde_yaml::to_string(&popularity_data)?)?; fs::write(
"results/popularity.yaml",
serde_yaml::to_string(&popularity_data)?,
)?;
} }
} }
link_count += 1; link_count += 1;
continue; continue;
} } else if CRATE_REGEX.is_match(&url) {
else if CRATE_REGEX.is_match(&url) {
let existing = popularity_data.cargo_downloads.get(&new_url); let existing = popularity_data.cargo_downloads.get(&new_url);
if let Some(downloads) = existing { if let Some(downloads) = existing {
cargo_downloads = Some(*downloads); cargo_downloads = Some(*downloads);
} else { } else {
let raw_downloads = get_downloads(&url).await; let raw_downloads = get_downloads(&url).await;
if let Some(positive_downloads) = raw_downloads { if let Some(positive_downloads) = raw_downloads {
cargo_downloads = Some(positive_downloads.clamp(0, u32::MAX as u64) as u32); cargo_downloads = Some(
popularity_data.cargo_downloads.insert(new_url, cargo_downloads.unwrap()); positive_downloads.clamp(0, u32::MAX as u64) as u32,
);
popularity_data
.cargo_downloads
.insert(new_url, cargo_downloads.unwrap());
if cargo_downloads.unwrap_or(0) >= MINIMUM_CARGO_DOWNLOADS { if cargo_downloads.unwrap_or(0) >= MINIMUM_CARGO_DOWNLOADS {
fs::write("results/popularity.yaml", serde_yaml::to_string(&popularity_data)?)?; fs::write(
"results/popularity.yaml",
serde_yaml::to_string(&popularity_data)?,
)?;
} }
} }
link_count += 1; link_count += 1;
@ -486,7 +502,10 @@ async fn main() -> Result<(), Error> {
list_items.last_mut().unwrap().data.push(list_item.clone()); list_items.last_mut().unwrap().data.push(list_item.clone());
in_list_item = false; in_list_item = false;
} }
list_items.push(ListInfo {location: range.start, data: Vec::new()}); list_items.push(ListInfo {
location: range.start,
data: Vec::new(),
});
} }
Tag::Item => { Tag::Item => {
if in_list_item && list_item.len() > 0 { if in_list_item && list_item.len() > 0 {
@ -531,7 +550,9 @@ async fn main() -> Result<(), Error> {
Tag::Item => { Tag::Item => {
if list_item.len() > 0 { if list_item.len() > 0 {
if link_count > 0 { if link_count > 0 {
if github_stars.unwrap_or(0) < required_stars && cargo_downloads.unwrap_or(0) < MINIMUM_CARGO_DOWNLOADS { if github_stars.unwrap_or(0) < required_stars
&& cargo_downloads.unwrap_or(0) < MINIMUM_CARGO_DOWNLOADS
{
if github_stars.is_none() { if github_stars.is_none() {
warn!("No valid github link"); warn!("No valid github link");
} }
@ -548,9 +569,11 @@ async fn main() -> Result<(), Error> {
} }
Tag::List(_) => { Tag::List(_) => {
let list_info = list_items.pop().unwrap(); let list_info = list_items.pop().unwrap();
if list_info.data.iter().find(|s| *s == "License").is_some() && list_info.data.iter().find(|s| *s == "Resources").is_some() { if list_info.data.iter().find(|s| *s == "License").is_some()
&& list_info.data.iter().find(|s| *s == "Resources").is_some()
{
// Ignore wrong ordering in top-level list // Ignore wrong ordering in top-level list
continue continue;
} }
let mut sorted_recent_list = list_info.data.to_vec(); let mut sorted_recent_list = list_info.data.to_vec();
sorted_recent_list.sort_by(|a, b| a.to_lowercase().cmp(&b.to_lowercase())); sorted_recent_list.sort_by(|a, b| a.to_lowercase().cmp(&b.to_lowercase()));
@ -566,12 +589,18 @@ async fn main() -> Result<(), Error> {
} }
} }
Event::Html(content) => { Event::Html(content) => {
return Err(format_err!("Contains HTML content, not markdown: {}", content)); return Err(format_err!(
"Contains HTML content, not markdown: {}",
content
));
} }
_ => {} _ => {}
} }
} }
fs::write("results/popularity.yaml", serde_yaml::to_string(&popularity_data)?)?; fs::write(
"results/popularity.yaml",
serde_yaml::to_string(&popularity_data)?,
)?;
to_check.sort_by(|a, b| { to_check.sort_by(|a, b| {
let get_time = |k| { let get_time = |k| {
@ -626,24 +655,30 @@ async fn main() -> Result<(), Error> {
link.last_working = Some(Local::now()); link.last_working = Some(Local::now());
link.working = Working::Yes; link.working = Working::Yes;
} else { } else {
results.insert(url.clone(), Link { results.insert(
url.clone(),
Link {
updated_at: Local::now(), updated_at: Local::now(),
last_working: Some(Local::now()), last_working: Some(Local::now()),
working: Working::Yes working: Working::Yes,
});
}
}, },
);
}
}
Err(err) => { Err(err) => {
print!("\u{2718} "); print!("\u{2718} ");
if let Some(link) = results.get_mut(&url) { if let Some(link) = results.get_mut(&url) {
link.updated_at = Local::now(); link.updated_at = Local::now();
link.working = Working::No(err); link.working = Working::No(err);
} else { } else {
results.insert(url.clone(), Link { results.insert(
url.clone(),
Link {
updated_at: Local::now(), updated_at: Local::now(),
working: Working::No(err), working: Working::No(err),
last_working: None last_working: None,
}); },
);
} }
} }
} }
@ -664,7 +699,9 @@ async fn main() -> Result<(), Error> {
for (url, link) in results.iter() { for (url, link) in results.iter() {
if let Working::No(ref err) = link.working { if let Working::No(ref err) = link.working {
match err { match err {
CheckerError::HttpError {status, ..} if *status == 301 || *status == 302 || *status == 404 => { CheckerError::HttpError { status, .. }
if *status == 301 || *status == 302 || *status == 404 =>
{
println!("{} {:?}", url, link); println!("{} {:?}", url, link);
failed += 1; failed += 1;
continue; continue;
@ -672,7 +709,10 @@ async fn main() -> Result<(), Error> {
CheckerError::TooManyRequests => { CheckerError::TooManyRequests => {
// too many tries // too many tries
if link.last_working.is_some() { if link.last_working.is_some() {
info!("Ignoring 429 failure on {} as we've seen success before", url); info!(
"Ignoring 429 failure on {} as we've seen success before",
url
);
continue; continue;
} }
} }
@ -684,7 +724,11 @@ async fn main() -> Result<(), Error> {
println!("{} {:?}", url, link); println!("{} {:?}", url, link);
failed += 1; failed += 1;
} else { } else {
println!("Failure occurred but only {}, so we're not worrying yet: {}", chrono_humanize::HumanTime::from(-since), formatter(err, url)); println!(
"Failure occurred but only {}, so we're not worrying yet: {}",
chrono_humanize::HumanTime::from(-since),
formatter(err, url)
);
} }
} else { } else {
println!("{} {:?}", url, link); println!("{} {:?}", url, link);