1. 提取链接
需要安装error-chain
库、reqwest
库、select
库、url
库以及tokio
库,可通过以下命令安装
cargo add error_chain cargo add select cargo add tokio --features full cargo add reqwest cargo add url
[dependencies] error-chain = "0.12.4" reqwest = "0.11.17" select = "0.6.0" tokio = { version = "1.28.0" , features = ["full" ] }url = "2.3.1"
1.1 从 HTML 网页中提取所有链接
使用 reqwest::get
执行 HTTP GET
请求,然后使用 Document::from_read
将响应信息解析为 HTML
文档。以“a
”(锚元素)作为结构体 Name
的参数,将结构体 Name
作为条件,使用 find
方法检索所有链接。在结构体 Selection
上调用 filter_map
方法,从具有 “href
” attr(属性)的链接检索所有 URL
。
use error_chain::error_chain;use select::document::Document;use select::predicate::Name;error_chain! { foreign_links { ReqError (reqwest::Error); IoError (std::io::Error); } } #[tokio::main] async fn main () -> Result <()> { let res = reqwest::get ("https://www.rust-lang.org/en-US/" ) .await ? .text () .await ?; Document::from (res.as_str ()) .find (Name ("a" )) .filter_map (|n| n.attr ("href" )) .for_each (|x| println! ("{}" , x)); Ok (()) }
/ /tools/install /learn ... https://github.com/rust-lang/www.rust-lang.org/issues/new/choose https://prev.rust-lang.org
1.2 检查网页死链
调用 get_base_url
方法检索 base URL
,如果 HTML
文档有 base
标签,从 base
标记获取 href attr
,初始 URL 的默认值是 Position::BeforePath
。遍历 HTML
文档中的链接,并创建一个 tokio::spawn
任务,该任务将使用 url::ParseOptions
结构体和 Url::parse
方法解析单个链接。任务执行中,使用 reqwest
向链接发起请求,并验证状态码结构体 StatusCode
。实例中使用 await
异步等待任务完成,然后结束程序。
use error_chain::error_chain;use reqwest::StatusCode;use select::document::Document;use select::predicate::Name;use std::collections::HashSet;use url::{Position, Url};error_chain! { foreign_links { ReqError (reqwest::Error); IoError (std::io::Error); UrlParseError (url::ParseError); JoinError (tokio::task::JoinError); } } async fn get_base_url (url: &Url, doc: &Document) -> Result <Url> { let base_tag_href = doc.find (Name ("base" )).filter_map (|n| n.attr ("href" )).nth (0 ); let base_url = base_tag_href.map_or_else (|| Url::parse (&url[..Position::BeforePath]), Url::parse)?; Ok (base_url) } async fn check_link (url: &Url) -> Result <bool > { let res = reqwest::get (url.as_ref ()).await ?; Ok (res.status () != StatusCode::NOT_FOUND) } #[tokio::main] async fn main () -> Result <()> { let url = Url::parse ("https://www.taissan.com/" )?; let res = reqwest::get (url.as_ref ()).await ?.text ().await ?; let document = Document::from (res.as_str ()); let base_url = get_base_url (&url, &document).await ?; let base_parser = Url::options ().base_url (Some (&base_url)); let links : HashSet<Url> = document .find (Name ("a" )) .filter_map (|n| n.attr ("href" )) .filter (|link| !link.starts_with ("javascript:void(0);" )) .filter_map (|link| base_parser.parse (link).ok ()) .collect (); let mut tasks = vec! []; for link in links { tasks.push (tokio::spawn (async move { if check_link (&link).await .unwrap () { println! ("{} 正常" , link); } else { println! ("{} 是死链" , link); } })); } for task in tasks { task.await ? } Ok (()) }
https://www.taissan.com/tags/minikube/ 正常 https://www.taissan.com/music/ 是死链 https://www.taissan.com/movies/ 是死链 https://www.sino-lang.org/ 正常 https://www.taissan.com/tags/python/ 正常 ...
1.3 从 WikiPedia
标记页面提取所有唯一性链接
需要安装regex
库和lazy_static
库,可通过cargo add regex
、cargo add lazy_static
命令安装
[dependencies] error-chain = "0.12.4" lazy_static = "1.4.0" regex = "1.8.1" reqwest = { version = "0.11.17" , features = ["json" ] }select = "0.6.0" tokio = { version = "1.28.0" , features = ["full" ] }
使用 reqwest::get
获取 MediaWiki
页面的源代码,然后使用 Regex::captures_iter
查找内部和外部链接的所有条目。使用智能指针 Cow
可以提供对借用数据的不可变引用,避免分配过多的字符串。
use lazy_static::lazy_static;use regex::Regex;use std::borrow::Cow;use std::collections::HashSet;use std::error::Error;fn extract_links (content: &str ) -> HashSet<Cow<str >> { lazy_static! { static ref WIKI_REGEX: Regex = Regex::new ( r"(?x) \[\[(?P<internal>[^\[\]|]*)[^\[\]]*\]\] # internal links | (url=|URL\||\[)(?P<external>http.*?)[ \|}] # external links " ) .unwrap (); } let links : HashSet<_> = WIKI_REGEX .captures_iter (content) .map (|c| match (c.name ("internal" ), c.name ("external" )) { (Some (val), None ) => Cow::from (val.as_str ().to_lowercase ()), (None , Some (val)) => Cow::from (val.as_str ()), _ => unreachable! (), }) .collect (); links } #[tokio::main] async fn main () -> Result <(), Box <dyn Error>> { let content = reqwest::get ( "https://en.wikipedia.org/w/index.php?title=Rust_(programming_language)&action=raw" , ) .await ? .text () .await ?; println! ("{:#?}" , extract_links (content.as_str ())); Ok (()) }
{ "firewall (computing)" , "conditional (computer programming)" , "algebraic data types" , ... "affine type system" , "reserved word" , }
2. URL
2.1 解析 URL
字符串为 Url
类型
url
crate 中的 parse
方法验证并解析 &str
切片为 Url
结构体。如果输入字符串的格式不正确,解析方法 parse
会返回 Result<Url, ParseError>
。一旦 URL
被解析,它就可以使用 Url
结构体类型中的所有方法。
use url::{ParseError, Url};fn main () -> Result <(), ParseError> { let s = "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" ; let parsed = Url::parse (s)?; println! ("URL 的路径部分是: {}" , parsed.path ()); Ok (()) }
URL 的路径部分是: /rust-lang/rust/issues
2.2 通过移除路径段创建基本 URL
基本 URL
(base URL)包括协议和域名。但基本 URL
(base URL)不包括目录、文件或查询字符串,这些项都可以从给定的 URL
中剥离出来。创建基本 URL
(base URL)时,通过 PathSegmentsMut::clear
方法移除目录和文件路径,通过方法 Url::set_query
移除查询字符串。
use error_chain::error_chain;use url::Url;error_chain! { foreign_links { UrlParse (url::ParseError); } errors { CannotBeABase } } fn main () -> Result <()> { let full = "https://github.com/rust-lang/cargo?asdf" ; let url = Url::parse (full)?; let base = base_url (url)?; assert_eq! (base.as_str (), "https://github.com/" ); println! ("基础URL是: {}" , base); Ok (()) } fn base_url (mut url: Url) -> Result <Url> { match url.path_segments_mut () { Ok (mut path) => { path.clear (); } Err (_) => { return Err (Error::from_kind (ErrorKind::CannotBeABase)); } } url.set_query (None ); Ok (url) }
基础URL是: https://github.com/
2.3 从基本 URL
创建新 URLs
join
方法从基路径和相对路径创建新的 URL
。
use url::{ParseError, Url};fn main () -> Result <(), ParseError> { let path = "/rust-lang/cargo" ; let gh = build_github_url (path)?; assert_eq! (gh.as_str (), "https://github.com/rust-lang/cargo" ); println! ("加入的网址是: {}" , gh); Ok (()) } fn build_github_url (path: &str ) -> Result <Url, ParseError> { const GITHUB: &'static str = "https://github.com" ; let base = Url::parse (GITHUB).expect ("已知硬编码 URL 是有效的" ); let joined = base.join (path)?; Ok (joined) }
加入的网址是: https://github.com/rust-lang/cargo
2.4 提取 URL
源(scheme/host/port
)
Url
结构体定义了多种方法,以便于提取有关它所表示的 URL
的信息。
use url::{Host, ParseError, Url};fn main () -> Result <(), ParseError> { let s = "ftp://rust-lang.org/examples" ; let url = Url::parse (s)?; assert_eq! (url.scheme (), "ftp" ); assert_eq! (url.host (), Some (Host::Domain ("rust-lang.org" ))); assert_eq! (url.port_or_known_default (), Some (21 )); println! ("出处果然不出所料!" ); Ok (()) }
use error_chain::error_chain;use url::{Host, Origin, Url};error_chain! { foreign_links { UrlParse (url::ParseError); } } fn main () -> Result <()> { let s = "ftp://rust-lang.org/examples" ; let url = Url::parse (s)?; let expected_scheme = "ftp" .to_owned (); let expected_host = Host::Domain ("rust-lang.org" .to_owned ()); let expected_port = 21 ; let expected = Origin::Tuple (expected_scheme, expected_host, expected_port); let origin = url.origin (); assert_eq! (origin, expected); println! ("出处果然不出所料!" ); Ok (()) }
2.5 从 URL
移除片段标识符和查询对
解析 Url
结构体,并使用 url::Position
枚举对其进行切片,以去除不需要的 URL
片段。
use url::{ParseError, Position, Url};fn main () -> Result <(), ParseError> { let parsed = Url::parse ("https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" )?; let cleaned : &str = &parsed[..Position::AfterPath]; println! ("清理后的网址: {}" , cleaned); Ok (()) }
清理后的网址: https://github.com/rust-lang/rust/issues
3. 媒介(MIME
)类型
需要安装mime
库,可通过cargo add mime
命令安装
[dependencies] mime = "0.3.17"
3.1 从字符串获取 MIME
类型
下面的实例展示如何使用 mime
crate 从字符串解析出 MIME
类型。FromStrError
结构体在 unwrap_or
子句中生成默认的 MIME
类型。
use mime::{Mime, APPLICATION_OCTET_STREAM};fn main () { let invalid_mime_type = "i n v a l i d" ; let default_mime = invalid_mime_type .parse::<Mime>() .unwrap_or (APPLICATION_OCTET_STREAM); println! ( "{:?} 的 MIME 使用默认值 {:?}" , invalid_mime_type, default_mime ); let valid_mime_type = "TEXT/PLAIN" ; let parsed_mime = valid_mime_type .parse::<Mime>() .unwrap_or (APPLICATION_OCTET_STREAM); println! ( "{:?} 的 MIME 被解析为 {:?}" , valid_mime_type, parsed_mime ); }
"i n v a l i d" 的 MIME 使用默认值 "application/octet-stream" "TEXT/PLAIN" 的 MIME 被解析为 "text/plain"
3.2 从文件名获取 MIME
类型
实例展示如何使用 mime
crate 从给定的文件名返回正确的 MIME
类型。程序将检查文件扩展名并与已知的 MIME
类型列表匹配,返回值为 mime:Mime
。
use mime::Mime;fn find_mimetype (filename: &String ) -> Mime { let parts : Vec <&str > = filename.split ('.' ).collect (); let res = match parts.last () { Some (v) => match *v { "png" => mime::IMAGE_PNG, "jpg" => mime::IMAGE_JPEG, "json" => mime::APPLICATION_JSON, &_ => mime::TEXT_PLAIN, }, None => mime::TEXT_PLAIN, }; return res; } fn main () { let filenames = vec! ["foobar.jpg" , "foo.bar" , "foobar.png" ]; for file in filenames { let mime = find_mimetype (&file.to_owned ()); println! ("{} 文件的 MIME类型是:{}" , file, mime); } }
foobar.jpg 文件的 MIME类型是:image/jpeg foo.bar 文件的 MIME类型是:text/plain foobar.png 文件的 MIME类型是:image/png
3.3 解析 HTTP
响应的 MIME
类型
当从 reqwest
接收到 HTTP
响应时,MIME
类型或媒体类型可以在实体头部的 Content-Type
标头中找到。reqwest::header::HeaderMap::get
方法将标头检索为结构体 reqwest::header::HeaderValue
,结构体可以转换为字符串。然后 mime
crate 可以解析它,生成 mime::Mime
值。mime
crate 也定义了一些常用的 MIME
类型。
use error_chain::error_chain;use mime::Mime;use reqwest::header::CONTENT_TYPE;use std::str ::FromStr;error_chain! { foreign_links { Reqwest (reqwest::Error); Header (reqwest::header::ToStrError); Mime (mime::FromStrError); } } #[tokio::main] async fn main () -> Result <()> { let response = reqwest::get ("https://www.rust-lang.org/logos/rust-logo-32x32.png" ).await ?; let headers = response.headers (); match headers.get (CONTENT_TYPE) { None => { println! ("响应不包含 Content-Type 标头." ); } Some (content_type) => { let content_type = Mime::from_str (content_type.to_str ()?)?; let media_type = match (content_type.type_ (), content_type.subtype ()) { (mime::TEXT, mime::HTML) => "一个 HTML 文件" , (mime::TEXT, _) => "文本文档" , (mime::IMAGE, mime::PNG) => "一张PNG图片" , (mime::IMAGE, _) => "一个图像" , _ => "既不是文字也不是图像" , }; println! ("响应包含: {}." , media_type); } }; Ok (()) }
4. 客户端
需要安装hyper
库和重新安装reqwest
库,可通过cargo add hyper
和cargo add reqwest --features blocking
命令安装
[dependencies] hyper = "0.14.26" reqwest = { version = "0.11.17" , features = ["blocking" ] }
4.1 请求处理
4.1.1 发出 HTTP GET
请求
解析提供的 URL,并使用 reqwest::blocking::get
发起同步 HTTP GET
请求。打印获取的响应消息状态和标头 reqwest::blocking::Response
。使用 read_to_string
将 HTTP
响应消息主体正文读入到指派的字符串 String
。
use error_chain::error_chain;use std::io::Read;error_chain! { foreign_links { Io (std::io::Error); HttpRequest (reqwest::Error); } } fn main () -> Result <()> { let mut res = reqwest::blocking::get ("http://httpbin.org/get" )?; let mut body = String ::new (); res.read_to_string (&mut body)?; println! ("状态: {}" , res.status ()); println! ("标头:\n{:#?}" , res.headers ()); println! ("正文:\n{}" , body); Ok (()) }
状态: 200 OK 标头: { "date" : "Sun, 30 Apr 2023 07:51:57 GMT" , "content-type" : "application/json" , "content-length" : "221" , "connection" : "keep-alive" , "server" : "gunicorn/19.9.0" , "access-control-allow-origin" : "*" , "access-control-allow-credentials" : "true" , } 正文: { "args" : {}, "headers" : { "Accept" : "*/*" , "Host" : "httpbin.org" , "X-Amzn-Trace-Id" : "Root=1-644e1e1d-0d67becf0590a0c6524831ff" }, "origin" : "192.168.1.1" , "url" : "http://httpbin.org/get" }
常见的方法是通过包含 tokio
在内的类似异步执行器,使主函数执行异步,但检索处理相同的信息。本实例中,tokio::main
处理所有繁重的执行器设置,并允许在 .await
之前不阻塞的按顺序执行代码。也可以使用 reqwest
的异步版本,其请求函数 reqwest::get
和响应结构体 reqwest::Response
都是异步的。
use error_chain::error_chain;error_chain! { foreign_links { Io (std::io::Error); HttpRequest (reqwest::Error); } } #[tokio::main] async fn main () -> Result <()> { let res = reqwest::get ("http://httpbin.org/get" ).await ?; println! ("状态: {}" , res.status ()); println! ("标头:\n{:#?}" , res.headers ()); let body = res.text ().await ?; println! ("正文:\n{}" , body); Ok (()) }
状态: 200 OK 标头: { "date" : "Sun, 30 Apr 2023 07:57:31 GMT" , "content-type" : "application/json" , "content-length" : "221" , "connection" : "keep-alive" , "server" : "gunicorn/19.9.0" , "access-control-allow-origin" : "*" , "access-control-allow-credentials" : "true" , } 正文: { "args" : {}, "headers" : { "Accept" : "*/*" , "Host" : "httpbin.org" , "X-Amzn-Trace-Id" : "Root=1-644e1f6b-088641ec6e919c6542c9c239" }, "origin" : "192.168.1.1" , "url" : "http://httpbin.org/get" }
4.1.2 为 REST
请求设置自定义消息标头和 URL
参数
需要安装serde
库,可通过cargo add serde --features derive
命令安装
[dependencies] hyper = "0.14.26" reqwest = { version = "0.11.17" , features = ["blocking" ] }serde = { version = "1.0.160" , features = ["derive" ] }
实例中为 HTTP GET
请求设置标准的和自定义的 HTTP
消息标头以及 URL
参数。使用 hyper::header!
宏 自定义header
创建 XPoweredBy
类型的自定义消息标头。使用 Url::parse_with_params
构建复杂的 URL。使用 RequestBuilder::header
方法设置标准消息标头 header::UserAgent
、header::Authorization
,以及自定义类型 XPoweredBy
,然后使用 RequestBuilder::send
发起请求。请求的服务目标为 http://httpbin.org/headers
,其响应结果是包含所有请求的消息标头的 JSON
字典,易于验证。
use error_chain::error_chain;use serde::Deserialize;use reqwest::blocking::Client;use reqwest::header::{AUTHORIZATION, USER_AGENT};use std::collections::HashMap;use url::Url;#[derive(Deserialize, Debug)] pub struct HeadersEcho { pub headers: HashMap<String , String >, } error_chain! { foreign_links { Reqwest (reqwest::Error); UrlParse (url::ParseError); } } fn main () -> Result <()> { let url = Url::parse_with_params ( "http://httpbin.org/headers" , &[("lang" , "rust" ), ("browser" , "servo" )], )?; let response = Client::new () .get (url) .header (USER_AGENT, "Rust-test" ) .header ( AUTHORIZATION, format! ("Bearer {}" , "DEadBEEfc001cAFeEDEcafBAd" ), ) .header ("X-Powered-By" , "Guybrush Threepwood" ) .send ()?; let url = response.url ().to_string (); let out : HeadersEcho = response.json ()?; assert_eq! ( out.headers["Authorization" ], "Bearer DEadBEEfc001cAFeEDEcafBAd" ); assert_eq! (out.headers["User-Agent" ], "Rust-test" ); assert_eq! (out.headers["X-Powered-By" ], "Guybrush Threepwood" ); assert_eq! ( url, "http://httpbin.org/headers?lang=rust&browser=servo" ); println! ("{:?}" , out); Ok (()) }
HeadersEcho { headers: {"Host" : "httpbin.org" , "User-Agent" : "Rust-test" , "X-Amzn-Trace-Id" : "Root=1-644f6d07-335fc12037d78830043aa2aa" , "Authorization" : "Bearer DEadBEEfc001cAFeEDEcafBAd" , "Accept" : "*/*" , "X-Powered-By" : "Guybrush Threepwood" } }
4.2 Web API
调用
4.2.1 查询 GitHub API
使用 reqwest::get
查询 点赞的用户 API v3
,以获取某个 GitHub
项目的所有点赞用户的列表。使用 Response::json
将响应信息 reqwest::Response
反序列化为实现了 serde::Deserialize
trait 的 User 对象。tokio::main
用于设置异步执行器,该进程异步等待 reqwest::get
完成,然后将响应信息反序列化到用户实例中。
use reqwest::Error;use serde::Deserialize;#[derive(Deserialize, Debug)] struct User { login: String , id: u64 , } #[tokio::main] async fn main () -> Result <(), Error> { let request_url = format! ( "https://api.github.com/repos/{owner}/{repo}/stargazers" , owner = "rust-lang-nursery" , repo = "rust-cookbook" ); println! ("{}" , request_url); let client = reqwest::Client::new (); let response = client .get (&request_url) .header (reqwest::header::USER_AGENT, "My Rust App" ) .send () .await ?; let users : Vec <User> = response.json ().await ?; println! ("{:?}" , users); Ok (()) }
https://api.github.com/repos/rust-lang-nursery/rust-cookbook/stargazers [User { login: "trentspi" , id : 20845184 }, User { login: "charlesetc" , id : 5167293 }, User { login: "Ruin0x11" , id : 6700637 }, User { login: "gmcabrita" , id : 957820 }, User { login: "cnwalker" , id : 8570077 }, User { login: "k0pernicus" , id : 3605451 }, User { login: "jaxx" , id : 723258 }, User { login: "dhharris" , id : 9009622 }, User { login: "zhangsoledad" , id : 3198439 }, User { login: "ssebastianj" , id : 309535 }, User { login: "oclbdk" , id : 136982 }, User { login: "Latrasis" , id : 4656227 }, User { login: "narendasan" , id : 1790613 }, User { login: "rishabh92" , id : 13951936 }, User { login: "hueftl" , id : 11706301 }, User { login: "seeekr" , id : 302886 }, User { login: "krzyk" , id : 105730 }, User { login: "rjammala" , id : 4990663 }, User { login: "DaseinPhaos" , id : 11028753 }, User { login: "jryans" , id : 279572 }, User { login: "burdges" , id : 680126 }, User { login: "zaa" , id : 5245 }, User { login: "DenisKolodin" , id : 418920 }, User { login: "messense" , id : 1556054 }, User { login: "wdv4758h" , id : 2716047 }, User { login: "iblislin" , id : 761623 }, User { login: "realityone" , id : 4059040 }, User { login: "sebasmagri" , id : 11137 }, User { login: "sourcepirate" , id : 5940286 }, User { login: "king6cong" , id : 302560 }]
4.2.2 检查 API
资源是否存在
消息标头 HEAD 请求((Client::head
)查询 GitHub 用户端接口,然后检查响应代码以确定是否成功。这是一种无需接收 HTTP 响应消息主体,即可快速查询 rest 资源的方法。使用 ClientBuilder::timeout
方法配置的 reqwest::Client
结构体将确保请求不会超时。由于 ClientBuilder::build
和 RequestBuilder::send
都返回错误类型 reqwest::Error
,所以便捷的 reqwest::Result
类型被用于主函数的返回类型。
use reqwest::ClientBuilder;use reqwest::Result ;use std::time::Duration;#[tokio::main] async fn main () -> Result <()> { let user = "ferris-the-crab" ; let request_url = format! ("https://api.github.com/users/{}" , user); println! ("{}" , request_url); let timeout = Duration::new (5 , 0 ); let client = ClientBuilder::new ().timeout (timeout).build ()?; let response = client .head (&request_url) .header (reqwest::header::USER_AGENT, "My Rust App" ) .send () .await ?; if response.status ().is_success () { println! ("{} 是一个用户!" , user); } else { println! ("{} 不是一个用户!" , user); } Ok (()) }
https://api.github.com/users/ferris-the-crab ferris-the-crab 不是一个用户!
4.2.3 使用 GitHub API
创建和删除 Gist
需要安装serde_json
库,可通过cargo add serde_json
命令安装
[dependencies] serde_json = "1.0.96"
使用 Client::post
创建一个 POST
请求提交到 GitHub gists API v3
接口的 gist,并使用 Client::delete
使用 DELETE
请求删除它。reqwest::Client
负责这两个请求的详细信息,包括:URL、消息体(body)和身份验证。serde_json::json!
宏的 POST
主体可以提供任意形式的 JSON
主体,通过调用 RequestBuilder::json
设置请求主体,RequestBuilder::basic_auth
处理身份验证。本实例中调用 RequestBuilder::send
方法同步执行请求(实例中使用 HTTP
基本认证 为了授权访问 GitHub API
)。
use error_chain::error_chain;use reqwest::Client;use serde::Deserialize;use serde_json::json;use std::env;error_chain! { foreign_links { EnvVar (env::VarError); HttpRequest (reqwest::Error); } } #[derive(Deserialize, Debug)] struct Gist { id: String , html_url: String , } #[tokio::main] async fn main () -> Result <()> { let gh_user = env::var ("GH_USER" )?; let gh_pass = env::var ("GH_PASS" )?; let gist_body = json!({ "description" : "the description for this gist" , "public" : true , "files" : { "main.rs" : { "content" : r#"fn main() { println!("hello world!");}"# } }}); let request_url = "https://api.github.com/gists" ; let response = Client::new () .post (request_url) .basic_auth (gh_user.clone (), Some (gh_pass.clone ())) .json (&gist_body) .header (reqwest::header::USER_AGENT, "My Rust App" ) .send () .await ?; let gist : Gist = response.json ().await ?; println! ("创建 {:?}" , gist); let request_url = format! ("{}/{}" , request_url, gist.id); let response = Client::new () .delete (&request_url) .basic_auth (gh_user, Some (gh_pass)) .header (reqwest::header::USER_AGENT, "My Rust App" ) .send () .await ?; println! ("Gist {} 已删除! 状态码:{}" , gist.id, response.status ()); Ok (()) }
export GH_USER=xxxxx export GH_PASS=ghp_*******************
创建 Gist { id : "d30b7ea925c570aaa59d22af83bd3f1a" , html_url: "https://gist.github.com/xxxxx/d30b7ea925c570aaa59d22af83bd3f1a" } Gist d30b7ea925c570aaa59d22af83bd3f1a 已删除! 状态码:204 No Content
4.2.4 使用 RESTful API
分页
可以将分页的 web API
方便地包裹在 Rust 迭代器中,当到达每一页的末尾时,迭代器会从远程服务器加载下一页结果。
use reqwest::Result ;use serde::Deserialize;#[derive(Deserialize)] struct ApiResponse { dependencies: Vec <Dependency>, meta: Meta, } #[derive(Deserialize)] struct Dependency { crate_id: String , } #[derive(Deserialize)] struct Meta { total: u32 , } struct ReverseDependencies { crate_id: String , dependencies: <Vec <Dependency> as IntoIterator >::IntoIter, client: reqwest::blocking::Client, page: u32 , per_page: u32 , total: u32 , } impl ReverseDependencies { fn of (crate_id: &str ) -> Result <Self > { Ok (ReverseDependencies { crate_id: crate_id.to_owned (), dependencies: vec! [].into_iter (), client: reqwest::blocking::Client::new (), page: 0 , per_page: 100 , total: 0 , }) } fn try_next (&mut self ) -> Result <Option <Dependency>> { if let Some (dep) = self .dependencies.next () { return Ok (Some (dep)); } if self .page > 0 && self .page * self .per_page >= self .total { return Ok (None ); } self .page += 1 ; let url = format! ( "https://crates.io/api/v1/crates/{}/reverse_dependencies?page={}&per_page={}" , self .crate_id, self .page, self .per_page ); println! ("{}" , url); let response = self .client .get (&url) .header (reqwest::header::USER_AGENT, "My Rust App" ) .send ()? .json::<ApiResponse>()?; self .dependencies = response.dependencies.into_iter (); self .total = response.meta.total; println! ("{}" , self .total); Ok (self .dependencies.next ()) } } impl Iterator for ReverseDependencies { type Item = Result <Dependency>; fn next (&mut self ) -> Option <Self ::Item> { match self .try_next () { Ok (Some (dep)) => Some (Ok (dep)), Ok (None ) => None , Err (err) => Some (Err (err)), } } } fn main () -> Result <()> { for dep in ReverseDependencies::of ("serde" )? { println! ("反向依赖: {}" , dep?.crate_id); } Ok (()) }
https://crates.io/api/v1/crates/serde/reverse_dependencies?page=1&per_page=100 26183 反向依赖: serde 反向依赖: serde ... https://crates.io/api/v1/crates/serde/reverse_dependencies?page=2&per_page=100 26183 反向依赖: serde 反向依赖: serde ...
4.2.5 处理速率受限 API
实例使用 GitHub API - 速率限制展示如何处理远程服务器错误。本实例 使用 hyper::header! 宏 自定义解析响应头并检查 reqwest::StatusCode::FORBIDDEN
。如果响应超过速率限制,则将等待并重试。
use error_chain::error_chain;use reqwest::StatusCode;use std::thread;use std::time::{Duration, UNIX_EPOCH};error_chain! { foreign_links { Io (std::io::Error); Time (std::time::SystemTimeError); Reqwest (reqwest::Error); } } fn main () -> Result <()> { loop { let url = "https://api.github.com/users/rust-lang-nursery " ; let client = reqwest::blocking::Client::new (); let response = client .get (url) .header (reqwest::header::USER_AGENT, "My Rust App" ) .send ()?; let rate_limit = response .headers () .get ("X-RateLimit-Limit" ) .ok_or ("响应不包含预期的 X-RateLimit-Limit 标头" )? .to_str () .map_err (|_| "无法解析 X-RateLimit-Limit 标头值" )? .parse::<u64 >() .map_err (|_| "无法解析 X-RateLimit-Limit 标头值" )?; let rate_remaining = response .headers () .get ("X-RateLimit-Remaining" ) .ok_or ("响应不包含预期的 X-RateLimit-Remaining 标头" )? .to_str () .map_err (|_| "无法解析 X-RateLimit-Remaining 标头值" )? .parse::<u64 >() .map_err (|_| "无法解析 X-RateLimit-Remaining 标头值" )?; let rate_reset_at = response .headers () .get ("X-RateLimit-Reset" ) .ok_or ("响应不包含预期的 X-RateLimit-Reset 标头" )? .to_str () .map_err (|_| "无法解析 X-RateLimit-Reset 标头值" )? .parse::<u64 >() .map_err (|_| "无法解析 X-RateLimit-Reset 标头值" )?; let rate_reset_within = Duration::from_secs (rate_reset_at) - UNIX_EPOCH.elapsed ()?; if response.status () == StatusCode::FORBIDDEN && rate_remaining == 0 { println! ("休眠 {} 秒。" , rate_reset_within.as_secs ()); thread::sleep (rate_reset_within); return main (); } else { println! ( "速率限制当前为 {}/{},此限制的重置将在 {} 秒内完成。" , rate_remaining, rate_limit, rate_reset_within.as_secs (), ); break ; } } Ok (()) }
速率限制当前为 56/60,此限制的重置将在 1069 秒内完成。
4.3 下载
4.3.1 下载文件到临时目录
需要安装tempfile
库,可通过cargo add tempfile
命令安装
[dependencies] tempfile = "3.5.0"
使用 tempfile::Builder
创建一个临时目录,并使用 reqwest::get
通过 HTTP 协议异步下载文件。使用 Response::url
方法内部的 tempdir()
方法获取文件名字,使用 File
结构体创建目标文件,并使用 io::copy
将下载的数据复制到文件中。程序退出时,会自动删除临时目录。
use error_chain::error_chain;use std::fs::File;use std::io::copy;use tempfile::Builder;error_chain! { foreign_links { Io (std::io::Error); HttpRequest (reqwest::Error); } } #[tokio::main] async fn main () -> Result <()> { let tmp_dir = Builder::new ().prefix ("example" ).tempdir ()?; let target = "https://www.rust-lang.org/logos/rust-logo-512x512.png" ; let response = reqwest::get (target).await ?; let mut dest = { let fname = response .url () .path_segments () .and_then (|segments| segments.last ()) .and_then (|name| if name.is_empty () { None } else { Some (name) }) .unwrap_or ("tmp.bin" ); println! ("要下载的文件:“{}”" , fname); let fname = tmp_dir.path ().join (fname); println! ("将位于:'{:?}'" , fname); File::create (fname)? }; let content = response.text ().await ?; copy (&mut content.as_bytes (), &mut dest)?; Ok (()) }
要下载的文件:“rust-logo-512x512.png” 将位于:'"/tmp/examplelAjEDw/rust-logo-512x512.png"'
4.3.2 使用 HTTP range
请求头进行部分下载
使用 reqwest::blocking::Client::head
获取响应的消息主体的大小(即消息主体内容长度)。然后,使用 reqwest::blocking::Client::get
下载(总计102400字节内容,单次下载 10240 字节的内容),同时打印进度消息。本实例使用同步的 reqwest
模块,消息范围标头指定响应的消息块大小和位置。
use error_chain::error_chain;use reqwest::header::{HeaderValue, CONTENT_LENGTH, RANGE};use reqwest::StatusCode;use std::fs::File;use std::str ::FromStr;error_chain! { foreign_links { Io (std::io::Error); Reqwest (reqwest::Error); Header (reqwest::header::ToStrError); } } struct PartialRangeIter { start: u64 , end: u64 , buffer_size: u32 , } impl PartialRangeIter { pub fn new (start: u64 , end: u64 , buffer_size: u32 ) -> Result <Self > { if buffer_size == 0 { Err ("无效的 buffer_size,给出一个大于零的值。" )?; } Ok (PartialRangeIter { start, end, buffer_size, }) } } impl Iterator for PartialRangeIter { type Item = HeaderValue; fn next (&mut self ) -> Option <Self ::Item> { if self .start > self .end { None } else { let prev_start = self .start; self .start += std::cmp::min (self .buffer_size as u64 , self .end - self .start + 1 ); Some ( HeaderValue::from_str (&format! ("bytes={}-{}" , prev_start, self .start - 1 )) .expect ("提供格式化的字符串!" ), ) } } } fn main () -> Result <()> { let url = "https://httpbin.org/range/102400?duration=2" ; const CHUNK_SIZE: u32 = 10240 ; let client = reqwest::blocking::Client::new (); let response = client.head (url).send ()?; let length = response .headers () .get (CONTENT_LENGTH) .ok_or ("响应不包括内容长度" )?; let length = u64 ::from_str (length.to_str ()?).map_err (|_| "无效的 Content-Length 标头" )?; let mut output_file = File::create ("download.bin" )?; println! ("开始下载..." ); for range in PartialRangeIter::new (0 , length - 1 , CHUNK_SIZE)? { println! ("范围 {:?}" , range); let mut response = client.get (url).header (RANGE, range).send ()?; let status = response.status (); if !(status == StatusCode::OK || status == StatusCode::PARTIAL_CONTENT) { error_chain::bail!("意外的服务器响应: {}" , status) } std::io::copy (&mut response, &mut output_file)?; } let content = response.text ()?; std::io::copy (&mut content.as_bytes (), &mut output_file)?; println! ("成功完成下载!" ); Ok (()) }
开始下载... 范围 "bytes=0-10239" 范围 "bytes=10240-20479" 范围 "bytes=20480-30719" 范围 "bytes=30720-40959" 范围 "bytes=40960-51199" 范围 "bytes=51200-61439" 范围 "bytes=61440-71679" 范围 "bytes=71680-81919" 范围 "bytes=81920-92159" 范围 "bytes=92160-102399" 成功完成下载!
4.3.3 POST 文件到 paste-rs
使用 reqwest::Client
建立与 https://paste.rs
的连接,遵循 reqwest::RequestBuilder
结构体模式。调用 Client::post
方法,以 URL
为参数连接目标,RequestBuilder::body
通过读取文件设置要发送的内容,RequestBuilder::send
方法在文件上传过程中将一直阻塞,直到返回响应消息。最后,read_to_string
返回响应消息并显示在控制台中。
use error_chain::error_chain;use std::fs::File;use std::io::Read;error_chain! { foreign_links { HttpRequest (reqwest::Error); IoError (::std::io::Error); } } #[tokio::main] async fn main () -> Result <()> { let paste_api = "https://paste.rs" ; let mut file = File::open ("message" )?; let mut contents = String ::new (); file.read_to_string (&mut contents)?; let client = reqwest::Client::new (); let res = client.post (paste_api).body (contents).send ().await ?; let response_text = res.text ().await ?; println! ("您的粘贴位于: {}" , response_text); Ok (()) }
您的粘贴位于: https://paste.rs/NZN