Async reqwest (#597)

* update reqwest and tokio

* Make extract link working with new packages

* Make uniq working with new packages

* Update get.md

* Make rest-post working with new packages

* Make API resource exists working with new packages

* Make Query the GitHub API working with new packages

* Fix typos

* update to edition 2018

* Extending reqwest

* fix mime request

* fix post file

* Fix download basic

* Remove toml lines

* add error handling

* Fix partial

* Improve coding in unique

* Borken links fix

* Fix retain example

* merging master

* Completed updating for async

* https://docs.rs/reqwest/0.10.6/reqwest/ references clients and they provided 404 errors

* replace anyhow with error-chain

* spelling and links

Co-authored-by: pollosp <pollo.es.pollo@gmail.com>
Co-authored-by: Olopez <pollosp@users.noreply.github.com>
This commit is contained in:
Andrew Gauger 2020-06-06 19:12:09 -07:00 committed by GitHub
parent b61c8e588a
commit dd4efa8dcd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 379 additions and 312 deletions

View file

@ -1,11 +1,10 @@
[package]
name = "rust-cookbook"
version = "0.1.0"
authors = ["Brian Anderson <banderson@mozilla.com>"]
version = "1.0.0"
authors = ["Brian Anderson <banderson@mozilla.com>", "Andrew Gauger <andygauge@gmail.com>"]
edition = "2018"
license = "MIT/Apache-2.0"
publish = false
build = "build.rs"
[dependencies]
@ -42,19 +41,20 @@ rand = "0.7.3"
rand_distr = "0.2.2"
rayon = "1.0"
regex = "1.0"
reqwest = "0.9"
reqwest = { version = "0.10", features = ["blocking", "json", "stream"] }
ring = "0.16.11"
rusqlite = { version = "0.22", features = ["chrono"] }
same-file = "1.0"
select = "0.4"
semver = "0.9"
serde = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_derive = "1.0"
serde_json = "1.0"
tar = "0.4.12"
tempfile = "3.1"
threadpool = "1.6"
toml = "0.4"
tokio = { version = "0.2", features = ["full"] }
unicode-segmentation = "1.2.1"
url = "2.1"
walkdir = "2.0"

View file

@ -1 +0,0 @@
# Error Handling

View file

@ -6,7 +6,7 @@ The [error-chain] crate makes [matching] on different error types returned by
a function possible and relatively compact. [`ErrorKind`] determines the error
type.
Uses [reqwest] to query a random integer generator web service. Converts
Uses [reqwest]::[blocking] to query a random integer generator web service. Converts
the string response into an integer. The Rust standard library,
[reqwest], and the web service can all generate errors. Well defined Rust errors
use [`foreign_links`]. An additional [`ErrorKind`] variant for the web service
@ -15,34 +15,29 @@ error uses `errors` block of the `error_chain!` macro.
```rust,edition2018
use error_chain::error_chain;
use std::io::Read;
error_chain! {
foreign_links {
Io(std::io::Error);
Reqwest(reqwest::Error);
ParseIntError(std::num::ParseIntError);
}
errors { RandomResponseError(t: String) }
}
fn parse_response(mut response: reqwest::Response) -> Result<u32> {
let mut body = String::new();
response.read_to_string(&mut body)?;
fn parse_response(response: reqwest::blocking::Response) -> Result<u32> {
let mut body = response.text()?;
body.pop();
body.parse::<u32>()
body
.parse::<u32>()
.chain_err(|| ErrorKind::RandomResponseError(body))
}
fn run() -> Result<()> {
let url =
format!("https://www.random.org/integers/?num=1&min=0&max=10&col=1&base=10&format=plain");
let response = reqwest::get(&url)?;
let response = reqwest::blocking::get(&url)?;
let random_value: u32 = parse_response(response)?;
println!("a random number between 0 and 10: {}", random_value);
Ok(())
}
@ -61,5 +56,5 @@ fn main() {
[`ErrorKind`]: https://docs.rs/error-chain/*/error_chain/example_generated/enum.ErrorKind.html
[`foreign_links`]: https://docs.rs/error-chain/*/error_chain/#foreign-links
[blocking]: https://docs.rs/reqwest/*/reqwest/blocking/index.html
[Matching]:https://docs.rs/error-chain/*/error_chain/#matching-errors

View file

@ -26,8 +26,18 @@
| [Get MIME type from filename][ex-mime-from-filename] | [![mime-badge]][mime] | [![cat-encoding-badge]][cat-encoding] |
| [Parse the MIME type of a HTTP response][ex-http-response-mime-type] | [![mime-badge]][mime] [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] |
## Clients
{{#include web/clients.md}}
| Recipe | Crates | Categories |
|--------|--------|------------|
| [Make a HTTP GET request][ex-url-basic] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] |
| [Query the GitHub API][ex-rest-get] | [![reqwest-badge]][reqwest] [![serde-badge]][serde] | [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] |
| [Check if an API resource exists][ex-rest-head] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] |
| [Create and delete Gist with GitHub API][ex-rest-post] | [![reqwest-badge]][reqwest] [![serde-badge]][serde] | [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] |
| [Consume a paginated RESTful API][ex-paginated-api] | [![reqwest-badge]][reqwest] [![serde-badge]][serde] | [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] |
| [Download a file to a temporary directory][ex-url-download] | [![reqwest-badge]][reqwest] [![tempdir-badge]][tempdir] | [![cat-net-badge]][cat-net] [![cat-filesystem-badge]][cat-filesystem] |
| [Make a partial download with HTTP range headers][ex-progress-with-range] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] |
| [POST a file to paste-rs][ex-file-post] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] |
[ex-extract-links-webpage]: web/scraping.html#extract-all-links-from-a-webpage-html
[ex-check-broken-links]: web/scraping.html#check-a-webpage-for-broken-links
@ -43,4 +53,15 @@
[ex-mime-from-filename]: web/mime.html#get-mime-type-from-filename
[ex-http-response-mime-type]: web/mime.html#parse-the-mime-type-of-a-http-response
[ex-url-basic]: web/clients/requests.html#make-a-http-get-request
[ex-rest-custom-params]: web/clients/requests.html#set-custom-headers-and-url-parameters-for-a-rest-request
[ex-rest-get]: web/clients/apis.html#query-the-github-api
[ex-rest-head]: web/clients/apis.html#check-if-an-api-resource-exists
[ex-rest-post]: web/clients/apis.html#create-and-delete-gist-with-github-api
[ex-paginated-api]: web/clients/apis.html#consume-a-paginated-restful-api
[ex-handle-rate-limited-api]: web/clients/apis.html#handle-a-rate-limited-api
[ex-url-download]: web/clients/download.html#download-a-file-to-a-temporary-directory
[ex-progress-with-range]: web/clients/download.html#make-a-partial-download-with-http-range-headers
[ex-file-post]: web/clients/download.html#post-a-file-to-paste-rs
{{#include links.md}}

View file

@ -11,15 +11,15 @@
| [Make a partial download with HTTP range headers][ex-progress-with-range] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] |
| [POST a file to paste-rs][ex-file-post] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] |
[ex-url-basic]: web/clients/requests.html#make-a-http-get-request
[ex-rest-custom-params]: web/clients/requests.html#set-custom-headers-and-url-parameters-for-a-rest-request
[ex-rest-get]: web/clients/apis.html#query-the-github-api
[ex-rest-head]: web/clients/apis.html#check-if-an-api-resource-exists
[ex-rest-post]: web/clients/apis.html#create-and-delete-gist-with-github-api
[ex-paginated-api]: web/clients/apis.html#consume-a-paginated-restful-api
[ex-handle-rate-limited-api]: web/clients/apis.html#handle-a-rate-limited-api
[ex-url-download]: web/clients/download.html#download-a-file-to-a-temporary-directory
[ex-progress-with-range]: web/clients/download.html#make-a-partial-download-with-http-range-headers
[ex-file-post]: web/clients/download.html#post-a-file-to-paste-rs
[ex-url-basic]: clients/requests.html#make-a-http-get-request
[ex-rest-custom-params]: clients/requests.html#set-custom-headers-and-url-parameters-for-a-rest-request
[ex-rest-get]: clients/apis.html#query-the-github-api
[ex-rest-head]: clients/apis.html#check-if-an-api-resource-exists
[ex-rest-post]: clients/apis.html#create-and-delete-gist-with-github-api
[ex-paginated-api]: clients/apis.html#consume-a-paginated-restful-api
[ex-handle-rate-limited-api]: clients/apis.html#handle-a-rate-limited-api
[ex-url-download]: clients/download.html#download-a-file-to-a-temporary-directory
[ex-progress-with-range]: clients/download.html#make-a-partial-download-with-http-range-headers
[ex-file-post]: clients/download.html#post-a-file-to-paste-rs
{{#include ../links.md}}

View file

@ -7,8 +7,8 @@ fetches the next page of results from the remote server as it arrives at the end
of each page.
```rust,edition2018,no_run
use reqwest::Result;
use serde::Deserialize;
use reqwest::Error;
#[derive(Deserialize)]
struct ApiResponse {
@ -29,25 +29,25 @@ struct Meta {
struct ReverseDependencies {
crate_id: String,
dependencies: <Vec<Dependency> as IntoIterator>::IntoIter,
client: reqwest::Client,
client: reqwest::blocking::Client,
page: u32,
per_page: u32,
total: u32,
}
impl ReverseDependencies {
fn of(crate_id: &str) -> Result<Self, Error> {
fn of(crate_id: &str) -> Result<Self> {
Ok(ReverseDependencies {
crate_id: crate_id.to_owned(),
dependencies: vec![].into_iter(),
client: reqwest::Client::new(),
client: reqwest::blocking::Client::new(),
page: 0,
per_page: 100,
total: 0,
})
}
fn try_next(&mut self) -> Result<Option<Dependency>, Error> {
fn try_next(&mut self) -> Result<Option<Dependency>> {
if let Some(dep) = self.dependencies.next() {
return Ok(Some(dep));
}
@ -70,7 +70,7 @@ impl ReverseDependencies {
}
impl Iterator for ReverseDependencies {
type Item = Result<Dependency, Error>;
type Item = Result<Dependency>;
fn next(&mut self) -> Option<Self::Item> {
match self.try_next() {
@ -81,7 +81,7 @@ impl Iterator for ReverseDependencies {
}
}
fn main() -> Result<(), Error> {
fn main() -> Result<()> {
for dep in ReverseDependencies::of("serde")? {
println!("reverse dependency: {}", dep?.crate_id);
}

View file

@ -3,7 +3,11 @@
[![reqwest-badge]][reqwest] [![serde-badge]][serde] [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding]
Queries GitHub [stargazers API v3](https://developer.github.com/v3/activity/starring/#list-stargazers)
with [`reqwest::get`] to get list of all users who have marked a GitHub project with a star. [`reqwest::Response`] is deserialized with [`Response::json`] into `User` objects implementing [`serde::Deserialize`].
with [`reqwest::get`] to get list of all users who have marked a GitHub project with a star.
[`reqwest::Response`] is deserialized with [`Response::json`] into `User` objects implementing [`serde::Deserialize`].
[tokio::main] is used to set up the async executor and the process waits for [`reqwet::get`] to complete before
processing the response into User instances.
```rust,edition2018,no_run
use serde::Deserialize;
@ -15,14 +19,15 @@ struct User {
id: u32,
}
fn main() -> Result<(), Error> {
#[tokio::main]
async fn main() -> Result<(), Error> {
let request_url = format!("https://api.github.com/repos/{owner}/{repo}/stargazers",
owner = "rust-lang-nursery",
repo = "rust-cookbook");
println!("{}", request_url);
let mut response = reqwest::get(&request_url)?;
let response = reqwest::get(&request_url).await?;
let users: Vec<User> = response.json()?;
let users: Vec<User> = response.json().await?;
println!("{:?}", users);
Ok(())
}

View file

@ -8,21 +8,23 @@ success. This is a quick way to query a rest resource without needing to receive
a body. [`reqwest::Client`] cofigured with [`ClientBuilder::timeout`] ensures
a request will not last longer than a timeout.
```rust,edition2018,no_run
Due to both [`ClientBuilder::build`] and [`ReqwestBuilder::send`] returning [`reqwest::Error`]
types, the shortcut [`reqwest::Result`] is used for the main function return type.
use reqwest::Error;
```rust,edition2018,no_run
use reqwest::Result;
use std::time::Duration;
use reqwest::ClientBuilder;
fn main() -> Result<(), Error> {
#[tokio::main]
async fn main() -> Result<()> {
let user = "ferris-the-crab";
let request_url = format!("https://api.github.com/users/{}", user);
println!("{}", request_url);
let timeout = Duration::new(5, 0);
let client = ClientBuilder::new().timeout(timeout).build()?;
let response = client.head(&request_url).send()?;
let response = client.head(&request_url).send().await?;
if response.status().is_success() {
println!("{} is a user!", user);
@ -34,6 +36,10 @@ fn main() -> Result<(), Error> {
}
```
[`ClientBuilder::build`]: https://docs.rs/reqwest/*/reqwest/struct.ClientBuilder.html#method.build
[`Client::head`]: https://docs.rs/reqwest/*/reqwest/struct.Client.html#method.head
[`ClientBuilder::timeout`]: https://docs.rs/reqwest/*/reqwest/struct.ClientBuilder.html#method.timeout
[`RequestBuilder::send`]: https://docs.rs/reqwest/*/reqwest/struct.RequestBuilder.html#method.send
[`reqwest::Client`]: https://docs.rs/reqwest/*/reqwest/struct.Client.html
[`reqwest::Error`]: https://docs.rs/reqwest/*/reqwest/struct.Error.html
[`reqwest::Result`]:https://docs.rs/reqwest/*/reqwest/type.Result.html

View file

@ -12,19 +12,18 @@ body. [`RequestBuilder::basic_auth`] handles authentication. The call to
[`RequestBuilder::send`] synchronously executes the requests.
```rust,edition2018,no_run
# use error_chain::error_chain;
use error_chain::error_chain;
use serde::Deserialize;
use serde_json::json;
use std::env;
use reqwest::Client;
#
# error_chain! {
# foreign_links {
# EnvVar(env::VarError);
# HttpRequest(reqwest::Error);
# }
# }
error_chain! {
foreign_links {
EnvVar(env::VarError);
HttpRequest(reqwest::Error);
}
}
#[derive(Deserialize, Debug)]
struct Gist {
@ -32,7 +31,8 @@ struct Gist {
html_url: String,
}
fn main() -> Result<()> {
#[tokio::main]
async fn main() -> Result<()> {
let gh_user = env::var("GH_USER")?;
let gh_pass = env::var("GH_PASS")?;
@ -46,20 +46,20 @@ fn main() -> Result<()> {
}});
let request_url = "https://api.github.com/gists";
let mut response = Client::new()
let response = Client::new()
.post(request_url)
.basic_auth(gh_user.clone(), Some(gh_pass.clone()))
.json(&gist_body)
.send()?;
.send().await?;
let gist: Gist = response.json()?;
let gist: Gist = response.json().await?;
println!("Created {:?}", gist);
let request_url = format!("{}/{}",request_url, gist.id);
let response = Client::new()
.delete(&request_url)
.basic_auth(gh_user, Some(gh_pass))
.send()?;
.send().await?;
println!("Gist {} deleted! Status code: {}",gist.id, response.status());
Ok(())

View file

@ -2,31 +2,31 @@
[![reqwest-badge]][reqwest] [![tempdir-badge]][tempdir] [![cat-net-badge]][cat-net] [![cat-filesystem-badge]][cat-filesystem]
Creates a temporary directory with [`tempfile::Builder`] and synchronously downloads
a file over HTTP using [`reqwest::get`].
Creates a temporary directory with [`tempfile::Builder`] and downloads
a file over HTTP using [`reqwest::get`] asynchronously.
Creates a target [`File`] with name obtained from [`Response::url`] within
[`tempdir()`] and copies downloaded data into it with [`io::copy`].
The temporary directory is automatically removed on `run` function return.
The temporary directory is automatically removed on program exit.
```rust,edition2018,no_run
# use error_chain::error_chain;
use error_chain::error_chain;
use std::io::copy;
use std::fs::File;
use tempfile::Builder;
#
# error_chain! {
# foreign_links {
# Io(std::io::Error);
# HttpRequest(reqwest::Error);
# }
# }
fn main() -> Result<()> {
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(reqwest::Error);
}
}
#[tokio::main]
async fn main() -> Result<()> {
let tmp_dir = Builder::new().prefix("example").tempdir()?;
let target = "https://www.rust-lang.org/logos/rust-logo-512x512.png";
let mut response = reqwest::get(target)?;
let response = reqwest::get(target).await?;
let mut dest = {
let fname = response
@ -41,7 +41,8 @@ fn main() -> Result<()> {
println!("will be located under: '{:?}'", fname);
File::create(fname)?
};
copy(&mut response, &mut dest)?;
let content = response.text().await?;
copy(&mut content.as_bytes(), &mut dest)?;
Ok(())
}
```

View file

@ -2,72 +2,66 @@
[![reqwest-badge]][reqwest] [![cat-net-badge]][cat-net]
Uses [`reqwest::Client::head`] to get the [Content-Length] of the response.
Uses [`reqwest::blocking::Client::head`] to get the [Content-Length] of the response.
The code then uses [`reqwest::Client::get`] to download the content in
chunks of 10240 bytes, while printing progress messages. The [Range] header specifies the chunk size and position.
The code then uses [`reqwest::blocking::Client::get`] to download the content in
chunks of 10240 bytes, while printing progress messages. This exmple uses the synchronous
reqwest module. The [Range] header specifies the chunk size and position.
The Range header is defined in [RFC7233][HTTP Range RFC7233].
```rust,edition2018,no_run
# use error_chain::error_chain;
use std::fs::File;
use std::str::FromStr;
use error_chain::error_chain;
use reqwest::header::{HeaderValue, CONTENT_LENGTH, RANGE};
use reqwest::StatusCode;
use std::fs::File;
use std::str::FromStr;
#
# error_chain! {
# foreign_links {
# Io(std::io::Error);
# Reqwest(reqwest::Error);
# Header(reqwest::header::ToStrError);
# }
# }
#
# struct PartialRangeIter {
# start: u64,
# end: u64,
# buffer_size: u32,
# }
#
# impl PartialRangeIter {
# pub fn new(start: u64, end: u64, buffer_size: u32) -> Result<Self> {
# if buffer_size == 0 {
# Err("invalid buffer_size, give a value greater than zero.")?;
# }
#
# Ok(PartialRangeIter {
# start,
# end,
# buffer_size,
# })
# }
# }
#
# impl Iterator for PartialRangeIter {
# type Item = HeaderValue;
#
# fn next(&mut self) -> Option<Self::Item> {
# if self.start > self.end {
# None
# } else {
# let prev_start = self.start;
# self.start += std::cmp::min(self.buffer_size as u64, self.end - self.start + 1);
# // NOTE(unwrap): `HeaderValue::from_str` will fail only if the value is not made
# // of visible ASCII characters. Since the format string is static and the two
# // values are integers, that can't happen.
# Some(HeaderValue::from_str(&format!("bytes={}-{}", prev_start, self.start - 1)).unwrap())
# }
# }
# }
error_chain! {
foreign_links {
Io(std::io::Error);
Reqwest(reqwest::Error);
Header(reqwest::header::ToStrError);
}
}
struct PartialRangeIter {
start: u64,
end: u64,
buffer_size: u32,
}
impl PartialRangeIter {
pub fn new(start: u64, end: u64, buffer_size: u32) -> Result<Self> {
if buffer_size == 0 {
Err("invalid buffer_size, give a value greater than zero.")?;
}
Ok(PartialRangeIter {
start,
end,
buffer_size,
})
}
}
impl Iterator for PartialRangeIter {
type Item = HeaderValue;
fn next(&mut self) -> Option<Self::Item> {
if self.start > self.end {
None
} else {
let prev_start = self.start;
self.start += std::cmp::min(self.buffer_size as u64, self.end - self.start + 1);
Some(HeaderValue::from_str(&format!("bytes={}-{}", prev_start, self.start - 1)).expect("string provided by format!"))
}
}
}
fn main() -> Result<()> {
let url = "https://httpbin.org/range/102400?duration=2";
const CHUNK_SIZE: u32 = 10240;
let client = reqwest::Client::new();
let client = reqwest::blocking::Client::new();
let response = client.head(url).send()?;
let length = response
.headers()
@ -86,17 +80,19 @@ fn main() -> Result<()> {
if !(status == StatusCode::OK || status == StatusCode::PARTIAL_CONTENT) {
error_chain::bail!("Unexpected server response: {}", status)
}
std::io::copy(&mut response, &mut output_file)?;
}
let content = response.text()?;
std::io::copy(&mut content.as_bytes(), &mut output_file)?;
println!("Finished with success!");
Ok(())
}
```
[`reqwest::Client::get`]: https://docs.rs/reqwest/*/reqwest/struct.Client.html#method.get
[`reqwest::Client::head`]: https://docs.rs/reqwest/*/reqwest/struct.Client.html#method.head
[`reqwest::blocking::Client::get`]: https://docs.rs/reqwest/*/reqwest/blocking/struct.Client.html#method.get
[`reqwest::blocking::Client::head`]: https://docs.rs/reqwest/*/reqwest/blocking/struct.Client.html#method.head
[Content-Length]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Length
[Range]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range

View file

@ -10,28 +10,32 @@ the file uploads and the response returns. [`read_to_string`] returns the
response and displays in the console.
```rust,edition2018,no_run
# use error_chain::error_chain;
#
use error_chain::error_chain;
use std::fs::File;
use std::io::Read;
use reqwest::Client;
#
# error_chain! {
# foreign_links {
# HttpRequest(reqwest::Error);
# IoError(::std::io::Error);
# }
# }
fn main() -> Result<()> {
error_chain! {
foreign_links {
HttpRequest(reqwest::Error);
IoError(::std::io::Error);
}
}
#[tokio::main]
async fn main() -> Result<()> {
let paste_api = "https://paste.rs";
let file = File::open("message")?;
let mut file = File::open("message")?;
let mut response = Client::new().post(paste_api).body(file).send()?;
let mut response_body = String::new();
response.read_to_string(&mut response_body)?;
println!("Your paste is located at: {}", response_body);
let mut contents = String::new();
file.read_to_string(&mut contents)?;
let client = reqwest::Client::new();
let res = client.post(paste_api)
.body(contents)
.send()
.await?;
let response_text = res.text().await?;
println!("Your paste is located at: {}",response_text );
Ok(())
}
```

View file

@ -3,24 +3,24 @@
[![reqwest-badge]][reqwest] [![cat-net-badge]][cat-net]
Parses the supplied URL and makes a synchronous HTTP GET request
with [`reqwest::get`]. Prints obtained [`reqwest::Response`]
with [`reqwest::blocking::get`]. Prints obtained [`reqwest::blocking::Response`]
status and headers. Reads HTTP response body into an allocated [`String`]
using [`read_to_string`].
```rust,edition2018,no_run
# use error_chain::error_chain;
```rust,edition2018,no_run
use error_chain::error_chain;
use std::io::Read;
#
# error_chain! {
# foreign_links {
# Io(std::io::Error);
# HttpRequest(reqwest::Error);
# }
# }
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(reqwest::Error);
}
}
fn main() -> Result<()> {
let mut res = reqwest::get("http://httpbin.org/get")?;
let mut res = reqwest::blocking::get("http://httpbin.org/get")?;
let mut body = String::new();
res.read_to_string(&mut body)?;
@ -30,9 +30,47 @@ fn main() -> Result<()> {
Ok(())
}
```
## Async
A similar approach can be used by including the [`tokio`] executor
to make the main function asynchronous, retrieving the same information.
In this example, [`tokio::main`] handles all the heavy executor setup
and allows sequential code implemented without blocking until `.await`.
Uses the asynchronous versions of [reqwest], both [`reqwest::get`] and
[`reqwest::Response`].
```rust,no_run
use error_chain::error_chain;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(reqwest::Error);
}
}
#[tokio::main]
async fn main() -> Result<()> {
let res = reqwest::get("http://httpbin.org/get").await?;
println!("Status: {}", res.status());
println!("Headers:\n{:#?}", res.headers());
let body = res.text().await?;
println!("Body:\n{}", body);
Ok(())
}
```
[`read_to_string`]: https://doc.rust-lang.org/std/io/trait.Read.html#method.read_to_string
[`reqwest::blocking::get`]: https://docs.rs/reqwest/*/reqwest/blocking/fn.get.html
[`reqwest::blocking::Response`]: https://docs.rs/reqwest/*/reqwest/blocking/struct.Response.html
[`reqwest::get`]: https://docs.rs/reqwest/*/reqwest/fn.get.html
[`reqwest::Response`]: https://docs.rs/reqwest/*/reqwest/struct.Response.html
[`String`]: https://doc.rust-lang.org/std/string/struct.String.html
[`tokio`]: https://docs.rs/crate/tokio/0.2.11
[`tokio::main`]: https://tokio.rs/docs/getting-started/hello-world/#let-s-write-some-code

View file

@ -7,28 +7,27 @@ found in the [Content-Type] header. [`reqwest::header::HeaderMap::get`] retrieve
the header as a [`reqwest::header::HeaderValue`], which can be converted to a
string. The `mime` crate can then parse that, yielding a [`mime::Mime`] value.
The `mime` crate also defines some commonly used MIME types.
The [`mime`] crate also defines some commonly used MIME types.
Note that the [`reqwest::header`] module is exported from the [`http`] crate.
```rust,edition2018,no_run
# use error_chain::error_chain;
use error_chain::error_chain;
use mime::Mime;
use std::str::FromStr;
use reqwest::header::CONTENT_TYPE;
#
# error_chain! {
# foreign_links {
# Reqwest(reqwest::Error);
# Header(reqwest::header::ToStrError);
# Mime(mime::FromStrError);
# }
# }
error_chain! {
foreign_links {
Reqwest(reqwest::Error);
Header(reqwest::header::ToStrError);
Mime(mime::FromStrError);
}
}
fn main() -> Result<()> {
let response = reqwest::get("https://www.rust-lang.org/logos/rust-logo-32x32.png")?;
#[tokio::main]
async fn main() -> Result<()> {
let response = reqwest::get("https://www.rust-lang.org/logos/rust-logo-32x32.png").await?;
let headers = response.headers();
match headers.get(CONTENT_TYPE) {

View file

@ -6,65 +6,68 @@ Call `get_base_url` to retrieve the base URL. If the document has a base tag,
get the href [`attr`] from base tag. [`Position::BeforePath`] of the original
URL acts as a default.
Iterate through links in the document and parse with [`url::ParseOptions`]
and [`Url::parse`]). Makes a request to the links with reqwest and verifies
[`StatusCode`].
Iterates through links in the document and creates a [`tokio::spawn`] task that will
parse an individual link with [`url::ParseOptions`] and [`Url::parse`]).
The task makes a request to the links with [reqwest] and verifies
[`StatusCode`]. Then the tasks `await` completion before ending the program.
```rust,edition2018,no_run
# use error_chain::error_chain;
use std::collections::HashSet;
use url::{Url, Position};
use error_chain::error_chain;
use reqwest::StatusCode;
use select::document::Document;
use select::predicate::Name;
#
# error_chain! {
# foreign_links {
# ReqError(reqwest::Error);
# IoError(std::io::Error);
# UrlParseError(url::ParseError);
# }
# }
use std::collections::HashSet;
use tokio::stream::{self, StreamExt};
use url::{Position, Url};
fn get_base_url(url: &Url, doc: &Document) -> Result<Url> {
error_chain! {
foreign_links {
ReqError(reqwest::Error);
IoError(std::io::Error);
UrlParseError(url::ParseError);
JoinError(tokio::task::JoinError);
}
}
async fn get_base_url(url: &Url, doc: &Document) -> Result<Url> {
let base_tag_href = doc.find(Name("base")).filter_map(|n| n.attr("href")).nth(0);
let base_url = base_tag_href.map_or_else(
|| Url::parse(&url[..Position::BeforePath]),
Url::parse,
)?;
let base_url =
base_tag_href.map_or_else(|| Url::parse(&url[..Position::BeforePath]), Url::parse)?;
Ok(base_url)
}
fn check_link(url: &Url) -> Result<bool> {
let res = reqwest::get(url.as_ref())?;
async fn check_link(url: &Url) -> Result<bool> {
let res = reqwest::get(url.as_ref()).await?;
Ok(res.status() != StatusCode::NOT_FOUND)
}
fn main() -> Result<()> {
#[tokio::main]
async fn main() -> Result<()> {
let url = Url::parse("https://www.rust-lang.org/en-US/")?;
let res = reqwest::get(url.as_ref())?;
let document = Document::from_read(res)?;
let base_url = get_base_url(&url, &document)?;
let res = reqwest::get(url.as_ref()).await?.text().await?;
let document = Document::from(res.as_str());
let base_url = get_base_url(&url, &document).await?;
let base_parser = Url::options().base_url(Some(&base_url));
let links: HashSet<Url> = document
.find(Name("a"))
.filter_map(|n| n.attr("href"))
.filter_map(|link| base_parser.parse(link).ok())
.collect();
let mut tasks = vec![];
links
.iter()
.filter(|link| check_link(link).ok() == Some(false))
.for_each(|x| println!("{} is broken.", x));
for link in links {
tasks.push(tokio::spawn(async move {
if check_link(&link).await.unwrap() {
println!("{} is OK", link);
} else {
println!("{} is Broken", link);
}
}));
}
for task in tasks {
task.await?
}
Ok(())
}
@ -73,5 +76,6 @@ fn main() -> Result<()> {
[`attr`]: https://docs.rs/select/*/select/node/struct.Node.html#method.attr
[`Position::BeforePath`]: https://docs.rs/url/*/url/enum.Position.html#variant.BeforePath
[`StatusCode`]: https://docs.rs/reqwest/*/reqwest/struct.StatusCode.html
[`tokio::spawn`]: https://docs.rs/tokio/*/tokio/fn.spawn.html
[`url::Parse`]: https://docs.rs/url/*/url/struct.Url.html#method.parse
[`url::ParseOptions`]: https://docs.rs/url/*/url/struct.ParseOptions.html

View file

@ -9,28 +9,32 @@ Call [`filter_map`] on the [`Selection`] retrieves URLs
from links that have the "href" [`attr`] (attribute).
```rust,edition2018,no_run
# use error_chain::error_chain;
use error_chain::error_chain;
use select::document::Document;
use select::predicate::Name;
#
# error_chain! {
# foreign_links {
# ReqError(reqwest::Error);
# IoError(std::io::Error);
# }
# }
fn main() -> Result<()> {
let res = reqwest::get("https://www.rust-lang.org/en-US/")?;
error_chain! {
foreign_links {
ReqError(reqwest::Error);
IoError(std::io::Error);
}
}
Document::from_read(res)?
#[tokio::main]
async fn main() -> Result<()> {
let res = reqwest::get("https://www.rust-lang.org/en-US/")
.await?
.text()
.await?;
Document::from(res.as_str())
.find(Name("a"))
.filter_map(|n| n.attr("href"))
.for_each(|x| println!("{}", x));
Ok(())
}
```
[`attr`]: https://docs.rs/select/*/select/node/struct.Node.html#method.attr

View file

@ -9,30 +9,22 @@ look for all entries of internal and external links with
MediaWiki link syntax is described [here][MediaWiki link syntax].
```rust,edition2018,no_run
# use error_chain::error_chain;
use lazy_static::lazy_static;
use std::io::Read;
use std::collections::HashSet;
use std::borrow::Cow;
use regex::Regex;
use std::borrow::Cow;
use std::collections::HashSet;
use std::error::Error;
# error_chain! {
# foreign_links {
# Io(std::io::Error);
# Reqwest(reqwest::Error);
# Regex(regex::Error);
# }
# }
#
fn extract_links(content: &str) -> Result<HashSet<Cow<str>>> {
fn extract_links(content: &str) -> HashSet<Cow<str>> {
lazy_static! {
static ref WIKI_REGEX: Regex =
Regex::new(r"(?x)
static ref WIKI_REGEX: Regex = Regex::new(
r"(?x)
\[\[(?P<internal>[^\[\]|]*)[^\[\]]*\]\] # internal links
|
(url=|URL\||\[)(?P<external>http.*?)[ \|}] # external links
").unwrap();
"
)
.unwrap();
}
let links: HashSet<_> = WIKI_REGEX
@ -44,20 +36,23 @@ fn extract_links(content: &str) -> Result<HashSet<Cow<str>>> {
})
.collect();
Ok(links)
links
}
fn main() -> Result<()> {
let mut content = String::new();
reqwest::get(
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let content = reqwest::get(
"https://en.wikipedia.org/w/index.php?title=Rust_(programming_language)&action=raw",
)?
.read_to_string(&mut content)?;
)
.await?
.text()
.await?;
println!("{:#?}", extract_links(&content)?);
println!("{:#?}", extract_links(content.as_str()));
Ok(())
}
```
[`Cow`]: https://doc.rust-lang.org/std/borrow/enum.Cow.html