mirror of
https://github.com/rust-unofficial/awesome-rust
synced 2024-11-10 06:14:13 +00:00
Look for links/images in HTML content in the Markdown
This commit is contained in:
parent
a37b1cfee3
commit
6cb9433de0
2 changed files with 36 additions and 14 deletions
|
@ -17,3 +17,4 @@ env_logger = "0.7"
|
||||||
async-std = "1"
|
async-std = "1"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
regex = "1"
|
regex = "1"
|
||||||
|
scraper = "0.11"
|
47
src/main.rs
47
src/main.rs
|
@ -11,7 +11,7 @@ use log::{warn, debug};
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use reqwest::{Client, redirect::Policy, StatusCode, header};
|
use reqwest::{Client, redirect::Policy, StatusCode, header};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
use failure::{Fail, Error, format_err};
|
use failure::{Fail, Error, format_err};
|
||||||
|
|
||||||
#[derive(Debug, Fail)]
|
#[derive(Debug, Fail)]
|
||||||
|
@ -155,22 +155,43 @@ async fn main() -> Result<(), Error> {
|
||||||
|
|
||||||
let mut url_checks = vec![];
|
let mut url_checks = vec![];
|
||||||
|
|
||||||
|
let mut do_check = |url: String| {
|
||||||
|
if !url.starts_with("http") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if results.working.contains(&url) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let check = get_url(url).boxed();
|
||||||
|
url_checks.push(check);
|
||||||
|
};
|
||||||
|
|
||||||
for (event, _range) in parser.into_offset_iter() {
|
for (event, _range) in parser.into_offset_iter() {
|
||||||
if let Event::Start(tag) = event {
|
match event {
|
||||||
match tag {
|
Event::Start(tag) => {
|
||||||
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
|
match tag {
|
||||||
if !url.starts_with("http") {
|
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
|
||||||
continue;
|
do_check(url.to_string());
|
||||||
}
|
}
|
||||||
let url_string = url.to_string();
|
_ => {}
|
||||||
if results.working.contains(&url_string) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let check = get_url(url_string).boxed();
|
|
||||||
url_checks.push(check);
|
|
||||||
}
|
}
|
||||||
_ => {}
|
|
||||||
}
|
}
|
||||||
|
Event::Html(content) => {
|
||||||
|
let fragment = Html::parse_fragment(&content);
|
||||||
|
for element in fragment.select(&Selector::parse("img").unwrap()) {
|
||||||
|
let img_src = element.value().attr("src");
|
||||||
|
if let Some(src) = img_src {
|
||||||
|
do_check(src.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for element in fragment.select(&Selector::parse("a").unwrap()) {
|
||||||
|
let a_href = element.value().attr("href");
|
||||||
|
if let Some(href) = a_href {
|
||||||
|
do_check(href.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue