From dd4efa8dcd8e611326caa01c08db8f227aa909d6 Mon Sep 17 00:00:00 2001 From: Andrew Gauger Date: Sat, 6 Jun 2020 19:12:09 -0700 Subject: [PATCH] Async reqwest (#597) * update reqwest and tokio * Make extract link working with new packages * Make uniq working with new packages * Update get.md * Make rest-post working with new packages * Make API resource exists working with new packages * Make Query the GitHub API working with new packages * Fix typos * update to edition 2018 * Extending reqwest * fix mime request * fix post file * Fix download basic * Remove toml lines * add error handling * Fix partial * Improve coding in unique * Borken links fix * Fix retain example * merging master * Completed updating for async * https://docs.rs/reqwest/0.10.6/reqwest/ references clients and they provided 404 errors * replace anyhow with error-chain * spelling and links Co-authored-by: pollosp Co-authored-by: Olopez --- Cargo.toml | 10 +- src/development_tools/errors.md | 1 - src/errors/handle/retain.md | 49 ++++---- src/web.md | 23 +++- src/web/clients.md | 20 ++-- src/web/clients/api/paginated.md | 14 +-- src/web/clients/api/rest-get.md | 13 ++- src/web/clients/api/rest-head.md | 16 ++- src/web/clients/api/rest-post.md | 30 ++--- src/web/clients/download/basic.md | 31 ++--- src/web/clients/download/partial.md | 156 +++++++++++++------------- src/web/clients/download/post-file.md | 38 ++++--- src/web/clients/requests/get.md | 60 ++++++++-- src/web/mime/request.md | 25 ++--- src/web/scraping/broken.md | 98 ++++++++-------- src/web/scraping/extract-links.md | 40 ++++--- src/web/scraping/unique.md | 67 +++++------ 17 files changed, 379 insertions(+), 312 deletions(-) delete mode 100644 src/development_tools/errors.md diff --git a/Cargo.toml b/Cargo.toml index 98a9847..e3f9da1 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,10 @@ [package] name = "rust-cookbook" -version = "0.1.0" -authors = ["Brian Anderson "] +version = "1.0.0" +authors = ["Brian Anderson ", "Andrew Gauger "] edition = "2018" license = "MIT/Apache-2.0" publish = false - build = "build.rs" [dependencies] @@ -42,19 +41,20 @@ rand = "0.7.3" rand_distr = "0.2.2" rayon = "1.0" regex = "1.0" -reqwest = "0.9" +reqwest = { version = "0.10", features = ["blocking", "json", "stream"] } ring = "0.16.11" rusqlite = { version = "0.22", features = ["chrono"] } same-file = "1.0" select = "0.4" semver = "0.9" -serde = "1.0" +serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0" serde_json = "1.0" tar = "0.4.12" tempfile = "3.1" threadpool = "1.6" toml = "0.4" +tokio = { version = "0.2", features = ["full"] } unicode-segmentation = "1.2.1" url = "2.1" walkdir = "2.0" diff --git a/src/development_tools/errors.md b/src/development_tools/errors.md deleted file mode 100644 index 4f6c27f..0000000 --- a/src/development_tools/errors.md +++ /dev/null @@ -1 +0,0 @@ -# Error Handling diff --git a/src/errors/handle/retain.md b/src/errors/handle/retain.md index 1ff94c2..d7a6a9f 100644 --- a/src/errors/handle/retain.md +++ b/src/errors/handle/retain.md @@ -6,7 +6,7 @@ The [error-chain] crate makes [matching] on different error types returned by a function possible and relatively compact. [`ErrorKind`] determines the error type. -Uses [reqwest] to query a random integer generator web service. Converts +Uses [reqwest]::[blocking] to query a random integer generator web service. Converts the string response into an integer. The Rust standard library, [reqwest], and the web service can all generate errors. Well defined Rust errors use [`foreign_links`]. An additional [`ErrorKind`] variant for the web service @@ -15,51 +15,46 @@ error uses `errors` block of the `error_chain!` macro. ```rust,edition2018 use error_chain::error_chain; -use std::io::Read; - error_chain! { foreign_links { Io(std::io::Error); Reqwest(reqwest::Error); ParseIntError(std::num::ParseIntError); } - errors { RandomResponseError(t: String) } } -fn parse_response(mut response: reqwest::Response) -> Result { - let mut body = String::new(); - response.read_to_string(&mut body)?; - body.pop(); - body.parse::() - .chain_err(|| ErrorKind::RandomResponseError(body)) +fn parse_response(response: reqwest::blocking::Response) -> Result { + let mut body = response.text()?; + body.pop(); + body + .parse::() + .chain_err(|| ErrorKind::RandomResponseError(body)) } fn run() -> Result<()> { - let url = - format!("https://www.random.org/integers/?num=1&min=0&max=10&col=1&base=10&format=plain"); - let response = reqwest::get(&url)?; - let random_value: u32 = parse_response(response)?; - - println!("a random number between 0 and 10: {}", random_value); - - Ok(()) + let url = + format!("https://www.random.org/integers/?num=1&min=0&max=10&col=1&base=10&format=plain"); + let response = reqwest::blocking::get(&url)?; + let random_value: u32 = parse_response(response)?; + println!("a random number between 0 and 10: {}", random_value); + Ok(()) } fn main() { - if let Err(error) = run() { - match *error.kind() { - ErrorKind::Io(_) => println!("Standard IO error: {:?}", error), - ErrorKind::Reqwest(_) => println!("Reqwest error: {:?}", error), - ErrorKind::ParseIntError(_) => println!("Standard parse int error: {:?}", error), - ErrorKind::RandomResponseError(_) => println!("User defined error: {:?}", error), - _ => println!("Other error: {:?}", error), - } + if let Err(error) = run() { + match *error.kind() { + ErrorKind::Io(_) => println!("Standard IO error: {:?}", error), + ErrorKind::Reqwest(_) => println!("Reqwest error: {:?}", error), + ErrorKind::ParseIntError(_) => println!("Standard parse int error: {:?}", error), + ErrorKind::RandomResponseError(_) => println!("User defined error: {:?}", error), + _ => println!("Other error: {:?}", error), } + } } ``` [`ErrorKind`]: https://docs.rs/error-chain/*/error_chain/example_generated/enum.ErrorKind.html [`foreign_links`]: https://docs.rs/error-chain/*/error_chain/#foreign-links - +[blocking]: https://docs.rs/reqwest/*/reqwest/blocking/index.html [Matching]:https://docs.rs/error-chain/*/error_chain/#matching-errors diff --git a/src/web.md b/src/web.md index 2b9a5e6..3bf9314 100644 --- a/src/web.md +++ b/src/web.md @@ -26,8 +26,18 @@ | [Get MIME type from filename][ex-mime-from-filename] | [![mime-badge]][mime] | [![cat-encoding-badge]][cat-encoding] | | [Parse the MIME type of a HTTP response][ex-http-response-mime-type] | [![mime-badge]][mime] [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] | +## Clients -{{#include web/clients.md}} +| Recipe | Crates | Categories | +|--------|--------|------------| +| [Make a HTTP GET request][ex-url-basic] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] | +| [Query the GitHub API][ex-rest-get] | [![reqwest-badge]][reqwest] [![serde-badge]][serde] | [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] | +| [Check if an API resource exists][ex-rest-head] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] | +| [Create and delete Gist with GitHub API][ex-rest-post] | [![reqwest-badge]][reqwest] [![serde-badge]][serde] | [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] | +| [Consume a paginated RESTful API][ex-paginated-api] | [![reqwest-badge]][reqwest] [![serde-badge]][serde] | [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] | +| [Download a file to a temporary directory][ex-url-download] | [![reqwest-badge]][reqwest] [![tempdir-badge]][tempdir] | [![cat-net-badge]][cat-net] [![cat-filesystem-badge]][cat-filesystem] | +| [Make a partial download with HTTP range headers][ex-progress-with-range] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] | +| [POST a file to paste-rs][ex-file-post] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] | [ex-extract-links-webpage]: web/scraping.html#extract-all-links-from-a-webpage-html [ex-check-broken-links]: web/scraping.html#check-a-webpage-for-broken-links @@ -43,4 +53,15 @@ [ex-mime-from-filename]: web/mime.html#get-mime-type-from-filename [ex-http-response-mime-type]: web/mime.html#parse-the-mime-type-of-a-http-response +[ex-url-basic]: web/clients/requests.html#make-a-http-get-request +[ex-rest-custom-params]: web/clients/requests.html#set-custom-headers-and-url-parameters-for-a-rest-request +[ex-rest-get]: web/clients/apis.html#query-the-github-api +[ex-rest-head]: web/clients/apis.html#check-if-an-api-resource-exists +[ex-rest-post]: web/clients/apis.html#create-and-delete-gist-with-github-api +[ex-paginated-api]: web/clients/apis.html#consume-a-paginated-restful-api +[ex-handle-rate-limited-api]: web/clients/apis.html#handle-a-rate-limited-api +[ex-url-download]: web/clients/download.html#download-a-file-to-a-temporary-directory +[ex-progress-with-range]: web/clients/download.html#make-a-partial-download-with-http-range-headers +[ex-file-post]: web/clients/download.html#post-a-file-to-paste-rs + {{#include links.md}} diff --git a/src/web/clients.md b/src/web/clients.md index dc5956b..8e7e478 100644 --- a/src/web/clients.md +++ b/src/web/clients.md @@ -11,15 +11,15 @@ | [Make a partial download with HTTP range headers][ex-progress-with-range] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] | | [POST a file to paste-rs][ex-file-post] | [![reqwest-badge]][reqwest] | [![cat-net-badge]][cat-net] | -[ex-url-basic]: web/clients/requests.html#make-a-http-get-request -[ex-rest-custom-params]: web/clients/requests.html#set-custom-headers-and-url-parameters-for-a-rest-request -[ex-rest-get]: web/clients/apis.html#query-the-github-api -[ex-rest-head]: web/clients/apis.html#check-if-an-api-resource-exists -[ex-rest-post]: web/clients/apis.html#create-and-delete-gist-with-github-api -[ex-paginated-api]: web/clients/apis.html#consume-a-paginated-restful-api -[ex-handle-rate-limited-api]: web/clients/apis.html#handle-a-rate-limited-api -[ex-url-download]: web/clients/download.html#download-a-file-to-a-temporary-directory -[ex-progress-with-range]: web/clients/download.html#make-a-partial-download-with-http-range-headers -[ex-file-post]: web/clients/download.html#post-a-file-to-paste-rs +[ex-url-basic]: clients/requests.html#make-a-http-get-request +[ex-rest-custom-params]: clients/requests.html#set-custom-headers-and-url-parameters-for-a-rest-request +[ex-rest-get]: clients/apis.html#query-the-github-api +[ex-rest-head]: clients/apis.html#check-if-an-api-resource-exists +[ex-rest-post]: clients/apis.html#create-and-delete-gist-with-github-api +[ex-paginated-api]: clients/apis.html#consume-a-paginated-restful-api +[ex-handle-rate-limited-api]: clients/apis.html#handle-a-rate-limited-api +[ex-url-download]: clients/download.html#download-a-file-to-a-temporary-directory +[ex-progress-with-range]: clients/download.html#make-a-partial-download-with-http-range-headers +[ex-file-post]: clients/download.html#post-a-file-to-paste-rs {{#include ../links.md}} diff --git a/src/web/clients/api/paginated.md b/src/web/clients/api/paginated.md index 55d46c4..5780c6d 100644 --- a/src/web/clients/api/paginated.md +++ b/src/web/clients/api/paginated.md @@ -7,8 +7,8 @@ fetches the next page of results from the remote server as it arrives at the end of each page. ```rust,edition2018,no_run +use reqwest::Result; use serde::Deserialize; -use reqwest::Error; #[derive(Deserialize)] struct ApiResponse { @@ -29,25 +29,25 @@ struct Meta { struct ReverseDependencies { crate_id: String, dependencies: as IntoIterator>::IntoIter, - client: reqwest::Client, + client: reqwest::blocking::Client, page: u32, per_page: u32, total: u32, } impl ReverseDependencies { - fn of(crate_id: &str) -> Result { + fn of(crate_id: &str) -> Result { Ok(ReverseDependencies { crate_id: crate_id.to_owned(), dependencies: vec![].into_iter(), - client: reqwest::Client::new(), + client: reqwest::blocking::Client::new(), page: 0, per_page: 100, total: 0, }) } - fn try_next(&mut self) -> Result, Error> { + fn try_next(&mut self) -> Result> { if let Some(dep) = self.dependencies.next() { return Ok(Some(dep)); } @@ -70,7 +70,7 @@ impl ReverseDependencies { } impl Iterator for ReverseDependencies { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { match self.try_next() { @@ -81,7 +81,7 @@ impl Iterator for ReverseDependencies { } } -fn main() -> Result<(), Error> { +fn main() -> Result<()> { for dep in ReverseDependencies::of("serde")? { println!("reverse dependency: {}", dep?.crate_id); } diff --git a/src/web/clients/api/rest-get.md b/src/web/clients/api/rest-get.md index cc7ac1b..c1d0bf2 100644 --- a/src/web/clients/api/rest-get.md +++ b/src/web/clients/api/rest-get.md @@ -3,7 +3,11 @@ [![reqwest-badge]][reqwest] [![serde-badge]][serde] [![cat-net-badge]][cat-net] [![cat-encoding-badge]][cat-encoding] Queries GitHub [stargazers API v3](https://developer.github.com/v3/activity/starring/#list-stargazers) -with [`reqwest::get`] to get list of all users who have marked a GitHub project with a star. [`reqwest::Response`] is deserialized with [`Response::json`] into `User` objects implementing [`serde::Deserialize`]. +with [`reqwest::get`] to get list of all users who have marked a GitHub project with a star. +[`reqwest::Response`] is deserialized with [`Response::json`] into `User` objects implementing [`serde::Deserialize`]. + +[tokio::main] is used to set up the async executor and the process waits for [`reqwet::get`] to complete before +processing the response into User instances. ```rust,edition2018,no_run use serde::Deserialize; @@ -15,14 +19,15 @@ struct User { id: u32, } -fn main() -> Result<(), Error> { +#[tokio::main] +async fn main() -> Result<(), Error> { let request_url = format!("https://api.github.com/repos/{owner}/{repo}/stargazers", owner = "rust-lang-nursery", repo = "rust-cookbook"); println!("{}", request_url); - let mut response = reqwest::get(&request_url)?; + let response = reqwest::get(&request_url).await?; - let users: Vec = response.json()?; + let users: Vec = response.json().await?; println!("{:?}", users); Ok(()) } diff --git a/src/web/clients/api/rest-head.md b/src/web/clients/api/rest-head.md index 04de301..53a6c98 100644 --- a/src/web/clients/api/rest-head.md +++ b/src/web/clients/api/rest-head.md @@ -8,21 +8,23 @@ success. This is a quick way to query a rest resource without needing to receive a body. [`reqwest::Client`] cofigured with [`ClientBuilder::timeout`] ensures a request will not last longer than a timeout. -```rust,edition2018,no_run +Due to both [`ClientBuilder::build`] and [`ReqwestBuilder::send`] returning [`reqwest::Error`] +types, the shortcut [`reqwest::Result`] is used for the main function return type. -use reqwest::Error; +```rust,edition2018,no_run +use reqwest::Result; use std::time::Duration; use reqwest::ClientBuilder; - -fn main() -> Result<(), Error> { +#[tokio::main] +async fn main() -> Result<()> { let user = "ferris-the-crab"; let request_url = format!("https://api.github.com/users/{}", user); println!("{}", request_url); let timeout = Duration::new(5, 0); let client = ClientBuilder::new().timeout(timeout).build()?; - let response = client.head(&request_url).send()?; + let response = client.head(&request_url).send().await?; if response.status().is_success() { println!("{} is a user!", user); @@ -34,6 +36,10 @@ fn main() -> Result<(), Error> { } ``` +[`ClientBuilder::build`]: https://docs.rs/reqwest/*/reqwest/struct.ClientBuilder.html#method.build [`Client::head`]: https://docs.rs/reqwest/*/reqwest/struct.Client.html#method.head [`ClientBuilder::timeout`]: https://docs.rs/reqwest/*/reqwest/struct.ClientBuilder.html#method.timeout +[`RequestBuilder::send`]: https://docs.rs/reqwest/*/reqwest/struct.RequestBuilder.html#method.send [`reqwest::Client`]: https://docs.rs/reqwest/*/reqwest/struct.Client.html +[`reqwest::Error`]: https://docs.rs/reqwest/*/reqwest/struct.Error.html +[`reqwest::Result`]:https://docs.rs/reqwest/*/reqwest/type.Result.html \ No newline at end of file diff --git a/src/web/clients/api/rest-post.md b/src/web/clients/api/rest-post.md index 1782201..bb5eb85 100644 --- a/src/web/clients/api/rest-post.md +++ b/src/web/clients/api/rest-post.md @@ -12,19 +12,18 @@ body. [`RequestBuilder::basic_auth`] handles authentication. The call to [`RequestBuilder::send`] synchronously executes the requests. ```rust,edition2018,no_run -# use error_chain::error_chain; +use error_chain::error_chain; use serde::Deserialize; - use serde_json::json; - +use serde_json::json; use std::env; use reqwest::Client; -# -# error_chain! { -# foreign_links { -# EnvVar(env::VarError); -# HttpRequest(reqwest::Error); -# } -# } + +error_chain! { + foreign_links { + EnvVar(env::VarError); + HttpRequest(reqwest::Error); + } +} #[derive(Deserialize, Debug)] struct Gist { @@ -32,7 +31,8 @@ struct Gist { html_url: String, } -fn main() -> Result<()> { +#[tokio::main] +async fn main() -> Result<()> { let gh_user = env::var("GH_USER")?; let gh_pass = env::var("GH_PASS")?; @@ -46,20 +46,20 @@ fn main() -> Result<()> { }}); let request_url = "https://api.github.com/gists"; - let mut response = Client::new() + let response = Client::new() .post(request_url) .basic_auth(gh_user.clone(), Some(gh_pass.clone())) .json(&gist_body) - .send()?; + .send().await?; - let gist: Gist = response.json()?; + let gist: Gist = response.json().await?; println!("Created {:?}", gist); let request_url = format!("{}/{}",request_url, gist.id); let response = Client::new() .delete(&request_url) .basic_auth(gh_user, Some(gh_pass)) - .send()?; + .send().await?; println!("Gist {} deleted! Status code: {}",gist.id, response.status()); Ok(()) diff --git a/src/web/clients/download/basic.md b/src/web/clients/download/basic.md index 041a87f..63168ca 100755 --- a/src/web/clients/download/basic.md +++ b/src/web/clients/download/basic.md @@ -2,31 +2,31 @@ [![reqwest-badge]][reqwest] [![tempdir-badge]][tempdir] [![cat-net-badge]][cat-net] [![cat-filesystem-badge]][cat-filesystem] -Creates a temporary directory with [`tempfile::Builder`] and synchronously downloads -a file over HTTP using [`reqwest::get`]. +Creates a temporary directory with [`tempfile::Builder`] and downloads +a file over HTTP using [`reqwest::get`] asynchronously. Creates a target [`File`] with name obtained from [`Response::url`] within [`tempdir()`] and copies downloaded data into it with [`io::copy`]. -The temporary directory is automatically removed on `run` function return. +The temporary directory is automatically removed on program exit. ```rust,edition2018,no_run -# use error_chain::error_chain; - +use error_chain::error_chain; use std::io::copy; use std::fs::File; use tempfile::Builder; -# -# error_chain! { -# foreign_links { -# Io(std::io::Error); -# HttpRequest(reqwest::Error); -# } -# } -fn main() -> Result<()> { +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(reqwest::Error); + } +} + +#[tokio::main] +async fn main() -> Result<()> { let tmp_dir = Builder::new().prefix("example").tempdir()?; let target = "https://www.rust-lang.org/logos/rust-logo-512x512.png"; - let mut response = reqwest::get(target)?; + let response = reqwest::get(target).await?; let mut dest = { let fname = response @@ -41,7 +41,8 @@ fn main() -> Result<()> { println!("will be located under: '{:?}'", fname); File::create(fname)? }; - copy(&mut response, &mut dest)?; + let content = response.text().await?; + copy(&mut content.as_bytes(), &mut dest)?; Ok(()) } ``` diff --git a/src/web/clients/download/partial.md b/src/web/clients/download/partial.md index 980b8e6..d288573 100644 --- a/src/web/clients/download/partial.md +++ b/src/web/clients/download/partial.md @@ -2,101 +2,97 @@ [![reqwest-badge]][reqwest] [![cat-net-badge]][cat-net] -Uses [`reqwest::Client::head`] to get the [Content-Length] of the response. +Uses [`reqwest::blocking::Client::head`] to get the [Content-Length] of the response. -The code then uses [`reqwest::Client::get`] to download the content in -chunks of 10240 bytes, while printing progress messages. The [Range] header specifies the chunk size and position. +The code then uses [`reqwest::blocking::Client::get`] to download the content in +chunks of 10240 bytes, while printing progress messages. This exmple uses the synchronous +reqwest module. The [Range] header specifies the chunk size and position. The Range header is defined in [RFC7233][HTTP Range RFC7233]. ```rust,edition2018,no_run -# use error_chain::error_chain; - -use std::fs::File; -use std::str::FromStr; +use error_chain::error_chain; use reqwest::header::{HeaderValue, CONTENT_LENGTH, RANGE}; use reqwest::StatusCode; +use std::fs::File; +use std::str::FromStr; -# -# error_chain! { -# foreign_links { -# Io(std::io::Error); -# Reqwest(reqwest::Error); -# Header(reqwest::header::ToStrError); -# } -# } -# -# struct PartialRangeIter { -# start: u64, -# end: u64, -# buffer_size: u32, -# } -# -# impl PartialRangeIter { -# pub fn new(start: u64, end: u64, buffer_size: u32) -> Result { -# if buffer_size == 0 { -# Err("invalid buffer_size, give a value greater than zero.")?; -# } -# -# Ok(PartialRangeIter { -# start, -# end, -# buffer_size, -# }) -# } -# } -# -# impl Iterator for PartialRangeIter { -# type Item = HeaderValue; -# -# fn next(&mut self) -> Option { -# if self.start > self.end { -# None -# } else { -# let prev_start = self.start; -# self.start += std::cmp::min(self.buffer_size as u64, self.end - self.start + 1); -# // NOTE(unwrap): `HeaderValue::from_str` will fail only if the value is not made -# // of visible ASCII characters. Since the format string is static and the two -# // values are integers, that can't happen. -# Some(HeaderValue::from_str(&format!("bytes={}-{}", prev_start, self.start - 1)).unwrap()) -# } -# } -# } +error_chain! { + foreign_links { + Io(std::io::Error); + Reqwest(reqwest::Error); + Header(reqwest::header::ToStrError); + } +} + +struct PartialRangeIter { + start: u64, + end: u64, + buffer_size: u32, +} + +impl PartialRangeIter { + pub fn new(start: u64, end: u64, buffer_size: u32) -> Result { + if buffer_size == 0 { + Err("invalid buffer_size, give a value greater than zero.")?; + } + Ok(PartialRangeIter { + start, + end, + buffer_size, + }) + } +} + +impl Iterator for PartialRangeIter { + type Item = HeaderValue; + fn next(&mut self) -> Option { + if self.start > self.end { + None + } else { + let prev_start = self.start; + self.start += std::cmp::min(self.buffer_size as u64, self.end - self.start + 1); + Some(HeaderValue::from_str(&format!("bytes={}-{}", prev_start, self.start - 1)).expect("string provided by format!")) + } + } +} fn main() -> Result<()> { - let url = "https://httpbin.org/range/102400?duration=2"; - const CHUNK_SIZE: u32 = 10240; - - let client = reqwest::Client::new(); - let response = client.head(url).send()?; - let length = response - .headers() - .get(CONTENT_LENGTH) - .ok_or("response doesn't include the content length")?; - let length = u64::from_str(length.to_str()?).map_err(|_| "invalid Content-Length header")?; - - let mut output_file = File::create("download.bin")?; - - println!("starting download..."); - for range in PartialRangeIter::new(0, length - 1, CHUNK_SIZE)? { - println!("range {:?}", range); - let mut response = client.get(url).header(RANGE, range).send()?; - - let status = response.status(); - if !(status == StatusCode::OK || status == StatusCode::PARTIAL_CONTENT) { - error_chain::bail!("Unexpected server response: {}", status) - } - - std::io::copy(&mut response, &mut output_file)?; + let url = "https://httpbin.org/range/102400?duration=2"; + const CHUNK_SIZE: u32 = 10240; + + let client = reqwest::blocking::Client::new(); + let response = client.head(url).send()?; + let length = response + .headers() + .get(CONTENT_LENGTH) + .ok_or("response doesn't include the content length")?; + let length = u64::from_str(length.to_str()?).map_err(|_| "invalid Content-Length header")?; + + let mut output_file = File::create("download.bin")?; + + println!("starting download..."); + for range in PartialRangeIter::new(0, length - 1, CHUNK_SIZE)? { + println!("range {:?}", range); + let mut response = client.get(url).header(RANGE, range).send()?; + + let status = response.status(); + if !(status == StatusCode::OK || status == StatusCode::PARTIAL_CONTENT) { + error_chain::bail!("Unexpected server response: {}", status) } + std::io::copy(&mut response, &mut output_file)?; + } + + let content = response.text()?; + std::io::copy(&mut content.as_bytes(), &mut output_file)?; - println!("Finished with success!"); - Ok(()) + println!("Finished with success!"); + Ok(()) } ``` -[`reqwest::Client::get`]: https://docs.rs/reqwest/*/reqwest/struct.Client.html#method.get -[`reqwest::Client::head`]: https://docs.rs/reqwest/*/reqwest/struct.Client.html#method.head +[`reqwest::blocking::Client::get`]: https://docs.rs/reqwest/*/reqwest/blocking/struct.Client.html#method.get +[`reqwest::blocking::Client::head`]: https://docs.rs/reqwest/*/reqwest/blocking/struct.Client.html#method.head [Content-Length]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Length [Range]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range diff --git a/src/web/clients/download/post-file.md b/src/web/clients/download/post-file.md index 3800542..79b9363 100644 --- a/src/web/clients/download/post-file.md +++ b/src/web/clients/download/post-file.md @@ -10,28 +10,32 @@ the file uploads and the response returns. [`read_to_string`] returns the response and displays in the console. ```rust,edition2018,no_run - -# use error_chain::error_chain; -# +use error_chain::error_chain; use std::fs::File; use std::io::Read; -use reqwest::Client; -# -# error_chain! { -# foreign_links { -# HttpRequest(reqwest::Error); -# IoError(::std::io::Error); -# } -# } -fn main() -> Result<()> { + error_chain! { + foreign_links { + HttpRequest(reqwest::Error); + IoError(::std::io::Error); + } + } + #[tokio::main] + +async fn main() -> Result<()> { let paste_api = "https://paste.rs"; - let file = File::open("message")?; + let mut file = File::open("message")?; - let mut response = Client::new().post(paste_api).body(file).send()?; - let mut response_body = String::new(); - response.read_to_string(&mut response_body)?; - println!("Your paste is located at: {}", response_body); + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + + let client = reqwest::Client::new(); + let res = client.post(paste_api) + .body(contents) + .send() + .await?; + let response_text = res.text().await?; + println!("Your paste is located at: {}",response_text ); Ok(()) } ``` diff --git a/src/web/clients/requests/get.md b/src/web/clients/requests/get.md index a262d23..8e810fa 100644 --- a/src/web/clients/requests/get.md +++ b/src/web/clients/requests/get.md @@ -3,24 +3,24 @@ [![reqwest-badge]][reqwest] [![cat-net-badge]][cat-net] Parses the supplied URL and makes a synchronous HTTP GET request -with [`reqwest::get`]. Prints obtained [`reqwest::Response`] +with [`reqwest::blocking::get`]. Prints obtained [`reqwest::blocking::Response`] status and headers. Reads HTTP response body into an allocated [`String`] using [`read_to_string`]. -```rust,edition2018,no_run -# use error_chain::error_chain; +```rust,edition2018,no_run +use error_chain::error_chain; use std::io::Read; -# -# error_chain! { -# foreign_links { -# Io(std::io::Error); -# HttpRequest(reqwest::Error); -# } -# } + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(reqwest::Error); + } +} fn main() -> Result<()> { - let mut res = reqwest::get("http://httpbin.org/get")?; + let mut res = reqwest::blocking::get("http://httpbin.org/get")?; let mut body = String::new(); res.read_to_string(&mut body)?; @@ -30,9 +30,47 @@ fn main() -> Result<()> { Ok(()) } + +``` + +## Async + +A similar approach can be used by including the [`tokio`] executor +to make the main function asynchronous, retrieving the same information. + +In this example, [`tokio::main`] handles all the heavy executor setup +and allows sequential code implemented without blocking until `.await`. + +Uses the asynchronous versions of [reqwest], both [`reqwest::get`] and +[`reqwest::Response`]. + +```rust,no_run +use error_chain::error_chain; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(reqwest::Error); + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let res = reqwest::get("http://httpbin.org/get").await?; + println!("Status: {}", res.status()); + println!("Headers:\n{:#?}", res.headers()); + + let body = res.text().await?; + println!("Body:\n{}", body); + Ok(()) +} ``` [`read_to_string`]: https://doc.rust-lang.org/std/io/trait.Read.html#method.read_to_string +[`reqwest::blocking::get`]: https://docs.rs/reqwest/*/reqwest/blocking/fn.get.html +[`reqwest::blocking::Response`]: https://docs.rs/reqwest/*/reqwest/blocking/struct.Response.html [`reqwest::get`]: https://docs.rs/reqwest/*/reqwest/fn.get.html [`reqwest::Response`]: https://docs.rs/reqwest/*/reqwest/struct.Response.html [`String`]: https://doc.rust-lang.org/std/string/struct.String.html +[`tokio`]: https://docs.rs/crate/tokio/0.2.11 +[`tokio::main`]: https://tokio.rs/docs/getting-started/hello-world/#let-s-write-some-code \ No newline at end of file diff --git a/src/web/mime/request.md b/src/web/mime/request.md index 5f53838..b66c5de 100644 --- a/src/web/mime/request.md +++ b/src/web/mime/request.md @@ -7,28 +7,27 @@ found in the [Content-Type] header. [`reqwest::header::HeaderMap::get`] retrieve the header as a [`reqwest::header::HeaderValue`], which can be converted to a string. The `mime` crate can then parse that, yielding a [`mime::Mime`] value. -The `mime` crate also defines some commonly used MIME types. +The [`mime`] crate also defines some commonly used MIME types. Note that the [`reqwest::header`] module is exported from the [`http`] crate. ```rust,edition2018,no_run -# use error_chain::error_chain; - +use error_chain::error_chain; use mime::Mime; use std::str::FromStr; use reqwest::header::CONTENT_TYPE; -# -# error_chain! { -# foreign_links { -# Reqwest(reqwest::Error); -# Header(reqwest::header::ToStrError); -# Mime(mime::FromStrError); -# } -# } + error_chain! { + foreign_links { + Reqwest(reqwest::Error); + Header(reqwest::header::ToStrError); + Mime(mime::FromStrError); + } + } -fn main() -> Result<()> { - let response = reqwest::get("https://www.rust-lang.org/logos/rust-logo-32x32.png")?; +#[tokio::main] +async fn main() -> Result<()> { + let response = reqwest::get("https://www.rust-lang.org/logos/rust-logo-32x32.png").await?; let headers = response.headers(); match headers.get(CONTENT_TYPE) { diff --git a/src/web/scraping/broken.md b/src/web/scraping/broken.md index d813dce..97da80a 100644 --- a/src/web/scraping/broken.md +++ b/src/web/scraping/broken.md @@ -6,72 +6,76 @@ Call `get_base_url` to retrieve the base URL. If the document has a base tag, get the href [`attr`] from base tag. [`Position::BeforePath`] of the original URL acts as a default. -Iterate through links in the document and parse with [`url::ParseOptions`] -and [`Url::parse`]). Makes a request to the links with reqwest and verifies -[`StatusCode`]. +Iterates through links in the document and creates a [`tokio::spawn`] task that will +parse an individual link with [`url::ParseOptions`] and [`Url::parse`]). +The task makes a request to the links with [reqwest] and verifies +[`StatusCode`]. Then the tasks `await` completion before ending the program. ```rust,edition2018,no_run -# use error_chain::error_chain; - -use std::collections::HashSet; - -use url::{Url, Position}; +use error_chain::error_chain; use reqwest::StatusCode; use select::document::Document; use select::predicate::Name; -# -# error_chain! { -# foreign_links { -# ReqError(reqwest::Error); -# IoError(std::io::Error); -# UrlParseError(url::ParseError); -# } -# } +use std::collections::HashSet; +use tokio::stream::{self, StreamExt}; +use url::{Position, Url}; -fn get_base_url(url: &Url, doc: &Document) -> Result { - let base_tag_href = doc.find(Name("base")).filter_map(|n| n.attr("href")).nth(0); - - let base_url = base_tag_href.map_or_else( - || Url::parse(&url[..Position::BeforePath]), - Url::parse, - )?; - - Ok(base_url) +error_chain! { + foreign_links { + ReqError(reqwest::Error); + IoError(std::io::Error); + UrlParseError(url::ParseError); + JoinError(tokio::task::JoinError); + } } -fn check_link(url: &Url) -> Result { - let res = reqwest::get(url.as_ref())?; - - Ok(res.status() != StatusCode::NOT_FOUND) +async fn get_base_url(url: &Url, doc: &Document) -> Result { + let base_tag_href = doc.find(Name("base")).filter_map(|n| n.attr("href")).nth(0); + let base_url = + base_tag_href.map_or_else(|| Url::parse(&url[..Position::BeforePath]), Url::parse)?; + Ok(base_url) } -fn main() -> Result<()> { - let url = Url::parse("https://www.rust-lang.org/en-US/")?; +async fn check_link(url: &Url) -> Result { + let res = reqwest::get(url.as_ref()).await?; + Ok(res.status() != StatusCode::NOT_FOUND) +} - let res = reqwest::get(url.as_ref())?; - let document = Document::from_read(res)?; +#[tokio::main] +async fn main() -> Result<()> { + let url = Url::parse("https://www.rust-lang.org/en-US/")?; + let res = reqwest::get(url.as_ref()).await?.text().await?; + let document = Document::from(res.as_str()); + let base_url = get_base_url(&url, &document).await?; + let base_parser = Url::options().base_url(Some(&base_url)); + let links: HashSet = document + .find(Name("a")) + .filter_map(|n| n.attr("href")) + .filter_map(|link| base_parser.parse(link).ok()) + .collect(); + let mut tasks = vec![]; - let base_url = get_base_url(&url, &document)?; + for link in links { + tasks.push(tokio::spawn(async move { + if check_link(&link).await.unwrap() { + println!("{} is OK", link); + } else { + println!("{} is Broken", link); + } + })); + } - let base_parser = Url::options().base_url(Some(&base_url)); + for task in tasks { + task.await? + } - let links: HashSet = document - .find(Name("a")) - .filter_map(|n| n.attr("href")) - .filter_map(|link| base_parser.parse(link).ok()) - .collect(); - - links - .iter() - .filter(|link| check_link(link).ok() == Some(false)) - .for_each(|x| println!("{} is broken.", x)); - - Ok(()) + Ok(()) } ``` [`attr`]: https://docs.rs/select/*/select/node/struct.Node.html#method.attr [`Position::BeforePath`]: https://docs.rs/url/*/url/enum.Position.html#variant.BeforePath [`StatusCode`]: https://docs.rs/reqwest/*/reqwest/struct.StatusCode.html +[`tokio::spawn`]: https://docs.rs/tokio/*/tokio/fn.spawn.html [`url::Parse`]: https://docs.rs/url/*/url/struct.Url.html#method.parse [`url::ParseOptions`]: https://docs.rs/url/*/url/struct.ParseOptions.html diff --git a/src/web/scraping/extract-links.md b/src/web/scraping/extract-links.md index 2d9b567..ba3a5ba 100644 --- a/src/web/scraping/extract-links.md +++ b/src/web/scraping/extract-links.md @@ -9,28 +9,32 @@ Call [`filter_map`] on the [`Selection`] retrieves URLs from links that have the "href" [`attr`] (attribute). ```rust,edition2018,no_run -# use error_chain::error_chain; - +use error_chain::error_chain; use select::document::Document; use select::predicate::Name; -# -# error_chain! { -# foreign_links { -# ReqError(reqwest::Error); -# IoError(std::io::Error); -# } -# } -fn main() -> Result<()> { - let res = reqwest::get("https://www.rust-lang.org/en-US/")?; - - Document::from_read(res)? - .find(Name("a")) - .filter_map(|n| n.attr("href")) - .for_each(|x| println!("{}", x)); - - Ok(()) +error_chain! { + foreign_links { + ReqError(reqwest::Error); + IoError(std::io::Error); + } } + +#[tokio::main] +async fn main() -> Result<()> { + let res = reqwest::get("https://www.rust-lang.org/en-US/") + .await? + .text() + .await?; + + Document::from(res.as_str()) + .find(Name("a")) + .filter_map(|n| n.attr("href")) + .for_each(|x| println!("{}", x)); + + Ok(()) +} + ``` [`attr`]: https://docs.rs/select/*/select/node/struct.Node.html#method.attr diff --git a/src/web/scraping/unique.md b/src/web/scraping/unique.md index efae7f1..9bf8344 100644 --- a/src/web/scraping/unique.md +++ b/src/web/scraping/unique.md @@ -9,55 +9,50 @@ look for all entries of internal and external links with MediaWiki link syntax is described [here][MediaWiki link syntax]. ```rust,edition2018,no_run -# use error_chain::error_chain; use lazy_static::lazy_static; - -use std::io::Read; -use std::collections::HashSet; -use std::borrow::Cow; use regex::Regex; +use std::borrow::Cow; +use std::collections::HashSet; +use std::error::Error; -# error_chain! { -# foreign_links { -# Io(std::io::Error); -# Reqwest(reqwest::Error); -# Regex(regex::Error); -# } -# } -# -fn extract_links(content: &str) -> Result>> { - lazy_static! { - static ref WIKI_REGEX: Regex = - Regex::new(r"(?x) +fn extract_links(content: &str) -> HashSet> { + lazy_static! { + static ref WIKI_REGEX: Regex = Regex::new( + r"(?x) \[\[(?P[^\[\]|]*)[^\[\]]*\]\] # internal links | (url=|URL\||\[)(?Phttp.*?)[ \|}] # external links - ").unwrap(); - } + " + ) + .unwrap(); + } - let links: HashSet<_> = WIKI_REGEX - .captures_iter(content) - .map(|c| match (c.name("internal"), c.name("external")) { - (Some(val), None) => Cow::from(val.as_str().to_lowercase()), - (None, Some(val)) => Cow::from(val.as_str()), - _ => unreachable!(), - }) - .collect(); + let links: HashSet<_> = WIKI_REGEX + .captures_iter(content) + .map(|c| match (c.name("internal"), c.name("external")) { + (Some(val), None) => Cow::from(val.as_str().to_lowercase()), + (None, Some(val)) => Cow::from(val.as_str()), + _ => unreachable!(), + }) + .collect(); - Ok(links) + links } -fn main() -> Result<()> { - let mut content = String::new(); - reqwest::get( - "https://en.wikipedia.org/w/index.php?title=Rust_(programming_language)&action=raw", - )? - .read_to_string(&mut content)?; +#[tokio::main] +async fn main() -> Result<(), Box> { + let content = reqwest::get( + "https://en.wikipedia.org/w/index.php?title=Rust_(programming_language)&action=raw", + ) + .await? + .text() + .await?; - println!("{:#?}", extract_links(&content)?); + println!("{:#?}", extract_links(content.as_str())); - Ok(()) + Ok(()) } + ``` [`Cow`]: https://doc.rust-lang.org/std/borrow/enum.Cow.html