diff --git a/Cargo.lock b/Cargo.lock
index 0b171f5c98..b0176dc168 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -430,7 +430,7 @@ dependencies = [
"ansi_term 0.11.0",
"atty",
"bitflags",
- "strsim 0.8.0",
+ "strsim",
"textwrap 0.11.0",
"unicode-width",
"vec_map",
@@ -533,41 +533,6 @@ dependencies = [
"syn",
]
-[[package]]
-name = "darling"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9d6ddad5866bb2170686ed03f6839d31a76e5407d80b1c334a2c24618543ffa"
-dependencies = [
- "darling_core",
- "darling_macro",
-]
-
-[[package]]
-name = "darling_core"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a9ced1fd13dc386d5a8315899de465708cf34ee2a6d9394654515214e67bb846"
-dependencies = [
- "fnv",
- "ident_case",
- "proc-macro2",
- "quote",
- "strsim 0.10.0",
- "syn",
-]
-
-[[package]]
-name = "darling_macro"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a7a1445d54b2f9792e3b31a3e715feabbace393f38dc4ffd49d94ee9bc487d5"
-dependencies = [
- "darling_core",
- "quote",
- "syn",
-]
-
[[package]]
name = "data-encoding"
version = "2.3.2"
@@ -588,38 +553,6 @@ dependencies = [
"tokio",
]
-[[package]]
-name = "derive_builder"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ef25735c9f0d0c547d2794701600c94abf030ecb740fad1673fa64461f3573"
-dependencies = [
- "derive_builder_core",
- "derive_builder_macro",
-]
-
-[[package]]
-name = "derive_builder_core"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3150f1e84602847b99d3eeb702487fc364f7d6c94f634e944a68fdbaea09e457"
-dependencies = [
- "darling",
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "derive_builder_macro"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ca1008bddefdc08d1e734aeb27b94f384390af261b4d1a8fb51fe19c577f05c"
-dependencies = [
- "derive_builder_core",
- "syn",
-]
-
[[package]]
name = "diff"
version = "0.1.12"
@@ -1206,12 +1139,6 @@ dependencies = [
"unicase",
]
-[[package]]
-name = "ident_case"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
-
[[package]]
name = "idna"
version = "0.2.2"
@@ -1425,7 +1352,6 @@ version = "0.6.0"
dependencies = [
"check-if-email-exists",
"deadpool",
- "derive_builder",
"doc-comment",
"fast_chemail",
"glob",
@@ -1444,6 +1370,7 @@ dependencies = [
"shellexpand",
"tempfile",
"tokio",
+ "typed-builder",
"url",
"wiremock",
]
@@ -2437,12 +2364,6 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
-[[package]]
-name = "strsim"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
-
[[package]]
name = "structopt"
version = "0.3.21"
@@ -2735,6 +2656,17 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
+[[package]]
+name = "typed-builder"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "345426c7406aa355b60c5007c79a2d1f5b605540072795222f17f6443e6a9c6f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "typenum"
version = "1.13.0"
diff --git a/README.md b/README.md
index 68db9c975f..83f4b2c8ed 100644
--- a/README.md
+++ b/README.md
@@ -154,8 +154,8 @@ USAGE:
lychee [FLAGS] [OPTIONS] [--] [inputs]...
FLAGS:
- -E, --exclude-all-private Exclude all private IPs from checking. Equivalent to `--exclude-private --exclude-link-
- local --exclude-loopback`
+ -E, --exclude-all-private Exclude all private IPs from checking.
+ Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`
--exclude-link-local Exclude link-local IP address range from checking
--exclude-loopback Exclude loopback IP address range from checking
--exclude-mail Exclude all mail addresses from checking
@@ -163,8 +163,8 @@ FLAGS:
--glob-ignore-case Ignore case when expanding filesystem path glob inputs
--help Prints help information
-i, --insecure Proceed for server connections considered insecure (invalid TLS)
- -n, --no-progress Do not show progress bar. This is recommended for non-interactive shells (e.g. for
- continuous integration)
+ -n, --no-progress Do not show progress bar.
+ This is recommended for non-interactive shells (e.g. for continuous integration)
--skip-missing Skip missing input files (default is to error if they don't exist)
-V, --version Prints version information
-v, --verbose Verbose program output
@@ -226,7 +226,7 @@ use lychee_lib::{ClientBuilder, Result, Status};
#[tokio::main]
async fn main() -> Result<()> {
- let client = ClientBuilder::default().build()?;
+ let client = ClientBuilder::default().client()?;
let response = client.check("https://github.com/lycheeverse/lychee").await?;
assert!(response.status().is_success());
Ok(())
@@ -236,7 +236,7 @@ async fn main() -> Result<()> {
The client builder is very customizable:
```rust, ignore
-let client = lychee_lib::ClientBuilder::default()
+let client = lychee_lib::ClientBuilder::builder()
.includes(includes)
.excludes(excludes)
.max_redirects(cfg.max_redirects)
@@ -249,7 +249,8 @@ let client = lychee_lib::ClientBuilder::default()
.github_token(cfg.github_token)
.scheme(cfg.scheme)
.accepted(accepted)
- .build()?;
+ .build()
+ .client()?;
```
All options that you set will be used for all link checks.
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index 27f2ceb9c9..059610e802 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -120,7 +120,7 @@ async fn run(cfg: &Config, inputs: Vec) -> Result {
let include = RegexSet::new(&cfg.include)?;
let exclude = RegexSet::new(&cfg.exclude)?;
- let client = ClientBuilder::default()
+ let client = ClientBuilder::builder()
.includes(include)
.excludes(exclude)
.exclude_all_private(cfg.exclude_all_private)
@@ -138,6 +138,7 @@ async fn run(cfg: &Config, inputs: Vec) -> Result {
.scheme(cfg.scheme.clone())
.accepted(accepted)
.build()
+ .client()
.map_err(|e| anyhow!(e))?;
let links = collect_links(
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index 63e2ca57d8..6476ce5842 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -118,9 +118,8 @@ pub(crate) struct Config {
pub(crate) verbose: bool,
/// Do not show progress bar.
- /// This is recommended for non-interactive shells (e.g. for continuous
- /// integration)
- #[structopt(short, long)]
+ /// This is recommended for non-interactive shells (e.g. for continuous integration)
+ #[structopt(short, long, verbatim_doc_comment)]
#[serde(default)]
pub(crate) no_progress: bool,
@@ -167,7 +166,7 @@ pub(crate) struct Config {
/// Exclude all private IPs from checking.
/// Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`
- #[structopt(short = "E", long)]
+ #[structopt(short = "E", long, verbatim_doc_comment)]
#[serde(default)]
pub(crate) exclude_all_private: bool,
diff --git a/lychee-bin/src/stats.rs b/lychee-bin/src/stats.rs
index 04dc82ec30..467d0d5c82 100644
--- a/lychee-bin/src/stats.rs
+++ b/lychee-bin/src/stats.rs
@@ -144,7 +144,7 @@ mod test {
.await;
ClientBuilder::default()
- .build()
+ .client()
.unwrap()
.check(mock_server.uri())
.await
diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml
index dee435fc0b..d92e6efbf1 100644
--- a/lychee-lib/Cargo.toml
+++ b/lychee-lib/Cargo.toml
@@ -19,7 +19,6 @@ version = "0.6.0"
[dependencies]
check-if-email-exists = "0.8.21"
deadpool = "0.7.0"
-derive_builder = "0.10.0"
fast_chemail = "0.9.6"
glob = "0.3.0"
html5ever = "0.25.1"
@@ -39,6 +38,7 @@ ring = "0.16.20"
serde = { version = "1.0.125", features = ["derive"] }
shellexpand = "2.1.0"
tokio = { version = "1.5.0", features = ["full"] }
+typed-builder = "0.9.0"
url = { version = "2.2.1", features = ["serde"] }
[dev-dependencies]
diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs
index b3661bf179..07b409a65d 100644
--- a/lychee-lib/src/client.rs
+++ b/lychee-lib/src/client.rs
@@ -1,12 +1,12 @@
#![allow(
clippy::module_name_repetitions,
clippy::struct_excessive_bools,
- clippy::default_trait_access
+ clippy::default_trait_access,
+ clippy::used_underscore_binding
)]
use std::{collections::HashSet, convert::TryFrom, time::Duration};
use check_if_email_exists::{check_email, CheckEmailInput, Reachable};
-use derive_builder::Builder;
use http::{
header::{HeaderMap, HeaderValue},
StatusCode,
@@ -15,6 +15,7 @@ use hubcaps::{Credentials, Github};
use regex::RegexSet;
use reqwest::header;
use tokio::time::sleep;
+use typed_builder::TypedBuilder;
use crate::{
filter::{Excludes, Filter, Includes},
@@ -45,11 +46,9 @@ pub struct Client {
/// A link checker using an API token for Github links
/// otherwise a normal HTTP client.
#[allow(unreachable_pub)]
-#[derive(Builder, Debug)]
-#[builder(build_fn(skip))]
-#[builder(setter(into))]
-#[builder(name = "ClientBuilder")]
-pub struct ClientBuilderInternal {
+#[derive(TypedBuilder, Debug)]
+#[builder(field_defaults(default, setter(into)))]
+pub struct ClientBuilder {
/// Set an optional Github token.
/// This allows for more requests before
/// getting rate-limited.
@@ -69,8 +68,12 @@ pub struct ClientBuilderInternal {
/// Don't check mail addresses
exclude_mail: bool,
/// Maximum number of redirects before returning error
+ #[builder(default = DEFAULT_MAX_REDIRECTS)]
max_redirects: usize,
/// User agent used for checking links
+ // Faking the user agent is necessary for some websites, unfortunately.
+ // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
+ #[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
user_agent: String,
/// Ignore SSL errors
allow_insecure: bool,
@@ -83,6 +86,7 @@ pub struct ClientBuilderInternal {
/// on some websites.
custom_headers: HeaderMap,
/// Request method (e.g. `GET` or `HEAD`)
+ #[builder(default = reqwest::Method::GET)]
method: reqwest::Method,
/// Set of accepted return codes / status codes
accepted: Option>,
@@ -90,73 +94,62 @@ pub struct ClientBuilderInternal {
timeout: Option,
}
-impl ClientBuilder {
- fn build_excludes(&self) -> Excludes {
- // exclude_all_private option turns on all "private" excludes,
- // including private IPs, link-local IPs and loopback IPs
- let exclude_all_private = matches!(self.exclude_all_private, Some(true));
- let enable_exclude = |opt| exclude_all_private || matches!(opt, Some(true));
-
- Excludes {
- regex: self.excludes.clone().unwrap_or_default(),
- private_ips: enable_exclude(self.exclude_private_ips),
- link_local_ips: enable_exclude(self.exclude_link_local_ips),
- loopback_ips: enable_exclude(self.exclude_loopback_ips),
- mail: self.exclude_mail.unwrap_or_default(),
- }
+impl Default for ClientBuilder {
+ fn default() -> Self {
+ Self::builder().build()
}
+}
- fn build_includes(&self) -> Includes {
- let regex = self.includes.clone().flatten();
- Includes { regex }
+impl ClientBuilder {
+ fn build_filter(&self) -> Filter {
+ let includes = self.includes.clone().map(|regex| Includes { regex });
+ let excludes = self.excludes.clone().map(|regex| Excludes { regex });
+ let scheme = self.scheme.clone().map(|s| s.to_lowercase());
+
+ Filter {
+ includes,
+ excludes,
+ scheme,
+ // exclude_all_private option turns on all "private" excludes,
+ // including private IPs, link-local IPs and loopback IPs
+ exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
+ exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips,
+ exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips,
+ exclude_mail: self.exclude_all_private || self.exclude_mail,
+ }
}
/// The build method instantiates the client.
#[allow(clippy::missing_errors_doc)]
- pub fn build(&self) -> Result {
- // Faking the user agent is necessary for some websites, unfortunately.
- // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
- let user_agent = self
- .user_agent
- .clone()
- .unwrap_or_else(|| DEFAULT_USER_AGENT.to_owned());
-
- let mut headers = self.custom_headers.clone().unwrap_or_default();
- headers.insert(header::USER_AGENT, HeaderValue::from_str(&user_agent)?);
+ pub fn client(&self) -> Result {
+ let mut headers = self.custom_headers.clone();
+ headers.insert(header::USER_AGENT, HeaderValue::from_str(&self.user_agent)?);
headers.insert(
header::TRANSFER_ENCODING,
HeaderValue::from_static("chunked"),
);
- let allow_insecure = self.allow_insecure.unwrap_or(false);
- let max_redirects = self.max_redirects.unwrap_or(DEFAULT_MAX_REDIRECTS);
-
let builder = reqwest::ClientBuilder::new()
.gzip(true)
.default_headers(headers)
- .danger_accept_invalid_certs(allow_insecure)
- .redirect(reqwest::redirect::Policy::limited(max_redirects));
-
- let timeout = self.timeout.flatten();
+ .danger_accept_invalid_certs(self.allow_insecure)
+ .redirect(reqwest::redirect::Policy::limited(self.max_redirects));
- let reqwest_client = (match timeout {
+ let reqwest_client = (match self.timeout {
Some(t) => builder.timeout(t),
None => builder,
})
.build()?;
- let github_token = match self.github_token.clone().flatten() {
- Some(token) if !token.is_empty() => {
- Some(Github::new(user_agent, Credentials::Token(token))?)
- }
+ let github_token = match self.github_token {
+ Some(ref token) if !token.is_empty() => Some(Github::new(
+ self.user_agent.clone(),
+ Credentials::Token(token.clone()),
+ )?),
_ => None,
};
- let includes = self.build_includes();
- let excludes = self.build_excludes();
- let scheme = self.scheme.clone().flatten().map(|s| s.to_lowercase());
-
- let filter = Filter::new(Some(includes), Some(excludes), scheme);
+ let filter = self.build_filter();
let quirks = Quirks::default();
@@ -164,9 +157,9 @@ impl ClientBuilder {
reqwest_client,
github_client: github_token,
filter,
+ method: self.method.clone(),
+ accepted: self.accepted.clone(),
quirks,
- method: self.method.clone().unwrap_or(reqwest::Method::GET),
- accepted: self.accepted.clone().unwrap_or_default(),
})
}
}
@@ -180,7 +173,7 @@ impl Client {
let Request { uri, source } = Request::try_from(request)?;
let status = if self.filter.is_excluded(&uri) {
Status::Excluded
- } else if uri.scheme() == "mailto" {
+ } else if uri.is_mail() {
self.check_mail(&uri).await
} else {
self.check_website(&uri).await
@@ -262,7 +255,7 @@ where
Request: TryFrom,
ErrorKind: From,
{
- let client = ClientBuilder::default().build()?;
+ let client = ClientBuilder::builder().build().client()?;
Ok(client.check(request).await?)
}
@@ -345,9 +338,10 @@ mod test {
assert!(res.status().is_failure());
// Same, but ignore certificate error
- let res = ClientBuilder::default()
+ let res = ClientBuilder::builder()
.allow_insecure(true)
.build()
+ .client()
.unwrap()
.check("https://expired.badssl.com/")
.await
@@ -366,9 +360,10 @@ mod test {
// See https://github.com/rust-lang/crates.io/issues/788
let mut custom = HeaderMap::new();
custom.insert(header::ACCEPT, "text/html".parse().unwrap());
- let res = ClientBuilder::default()
+ let res = ClientBuilder::builder()
.custom_headers(custom)
.build()
+ .client()
.unwrap()
.check("https://crates.io/crates/lychee")
.await
@@ -387,9 +382,10 @@ mod test {
let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
- let client = ClientBuilder::default()
+ let client = ClientBuilder::builder()
.timeout(checker_timeout)
.build()
+ .client()
.unwrap();
let res = client.check(mock_server.uri()).await.unwrap();
diff --git a/lychee-lib/src/filter/excludes.rs b/lychee-lib/src/filter/excludes.rs
index e4878607ee..9e5b19c7bb 100644
--- a/lychee-lib/src/filter/excludes.rs
+++ b/lychee-lib/src/filter/excludes.rs
@@ -1,76 +1,23 @@
use regex::RegexSet;
-use std::net::IpAddr;
-
-use crate::Uri;
-
-/// Pre-defined exclusions for known false-positives
-static FALSE_POSITIVE_PAT: &[&str] = &[r"http://www.w3.org/1999/xhtml"];
/// Exclude configuration for the link checker.
-/// You can ignore links based on regex patterns or pre-defined IP ranges.
-#[allow(clippy::struct_excessive_bools)]
+/// You can ignore links based on regex patterns.
#[derive(Clone, Debug)]
pub struct Excludes {
/// User-defined set of excluded regex patterns
- pub regex: Option,
- /// Example: 192.168.0.1
- pub private_ips: bool,
- /// Example: 169.254.0.0
- pub link_local_ips: bool,
- /// For IPv4: 127.0.0.1/8
- /// For IPv6: ::1/128
- pub loopback_ips: bool,
- /// Example: octocat@github.com
- pub mail: bool,
-}
-
-impl Default for Excludes {
- fn default() -> Self {
- Self {
- regex: None,
- private_ips: false,
- link_local_ips: false,
- loopback_ips: false,
- mail: false,
- }
- }
+ pub(crate) regex: RegexSet,
}
impl Excludes {
#[inline]
#[must_use]
- pub fn regex(&self, input: &str) -> bool {
- self.regex.as_ref().map_or(false, |re| re.is_match(input))
- }
-
- #[must_use]
- pub fn is_false_positive(input: &str) -> bool {
- input == FALSE_POSITIVE_PAT[0]
- }
-
- #[must_use]
- pub fn ip(&self, uri: &Uri) -> bool {
- match uri.host_ip() {
- Some(ip_addr) if self.loopback_ips && ip_addr.is_loopback() => true,
- // Note: in a pathological case, an IPv6 address can be IPv4-mapped
- // (IPv4 address embedded in a IPv6). We purposefully
- // don't deal with it here, and assume if an address is IPv6,
- // we shouldn't attempt to map it to IPv4.
- // See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2
- Some(IpAddr::V4(v4_addr)) if self.private_ips && v4_addr.is_private() => true,
- Some(IpAddr::V4(v4_addr)) if self.link_local_ips && v4_addr.is_link_local() => true,
- _ => false,
- }
+ pub fn is_match(&self, input: &str) -> bool {
+ self.regex.is_match(input)
}
#[inline]
#[must_use]
- pub const fn is_mail_excluded(&self) -> bool {
- self.mail
- }
-
- #[inline]
pub fn is_empty(&self) -> bool {
- self.regex.as_ref().map_or(true, RegexSet::is_empty)
+ self.regex.is_empty()
}
}
diff --git a/lychee-lib/src/filter/includes.rs b/lychee-lib/src/filter/includes.rs
index 8a8ac7ce1b..d2c2698e13 100644
--- a/lychee-lib/src/filter/includes.rs
+++ b/lychee-lib/src/filter/includes.rs
@@ -2,20 +2,22 @@ use regex::RegexSet;
/// Include configuration for the link checker.
/// You can include links based on regex patterns
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug)]
pub struct Includes {
- pub regex: Option,
+ /// User-defined set of included regex patterns
+ pub regex: RegexSet,
}
impl Includes {
#[inline]
#[must_use]
- pub fn regex(&self, input: &str) -> bool {
- self.regex.as_ref().map_or(false, |re| re.is_match(input))
+ pub fn is_match(&self, input: &str) -> bool {
+ self.regex.is_match(input)
}
#[inline]
+ #[must_use]
pub fn is_empty(&self) -> bool {
- self.regex.as_ref().map_or(true, RegexSet::is_empty)
+ self.regex.is_empty()
}
}
diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs
index a6fcb996e9..5d5b942004 100644
--- a/lychee-lib/src/filter/mod.rs
+++ b/lychee-lib/src/filter/mod.rs
@@ -1,74 +1,147 @@
mod excludes;
mod includes;
+use std::net::IpAddr;
+
pub use excludes::Excludes;
pub use includes::Includes;
use crate::uri::Uri;
+/// Pre-defined exclusions for known false-positives
+static FALSE_POSITIVE_PAT: &[&str] = &[r"http://www.w3.org/1999/xhtml"];
+
+#[inline]
+#[must_use]
+pub fn is_false_positive(input: &str) -> bool {
+ input == FALSE_POSITIVE_PAT[0]
+}
+
/// A generic URI filter
/// Used to decide if a given URI should be checked or skipped
+#[allow(clippy::struct_excessive_bools)]
#[derive(Clone, Debug, Default)]
pub struct Filter {
- pub(crate) includes: Includes,
- pub(crate) excludes: Excludes,
- pub(crate) scheme: Option,
+ pub includes: Option,
+ pub excludes: Option,
+ // TODO: accept multiple scheme
+ // TODO: includes scheme and excludes scheme
+ // TODO: excludes_mail should be merged to excludes scheme
+ // allowed scheme
+ pub scheme: Option,
+ /// Example: 192.168.0.1
+ pub exclude_private_ips: bool,
+ /// Example: 169.254.0.0
+ pub exclude_link_local_ips: bool,
+ /// For IPv4: 127.0.0.1/8
+ /// For IPv6: ::1/128
+ pub exclude_loopback_ips: bool,
+ /// Example: octocat@github.com
+ pub exclude_mail: bool,
}
impl Filter {
+ #[inline]
#[must_use]
- pub fn new(
- includes: Option,
- excludes: Option,
- scheme: Option,
- ) -> Self {
- Filter {
- includes: includes.unwrap_or_default(),
- excludes: excludes.unwrap_or_default(),
- scheme,
+ pub fn is_mail_excluded(&self, uri: &Uri) -> bool {
+ uri.is_mail() && self.exclude_mail
+ }
+
+ #[must_use]
+ pub fn is_ip_excluded(&self, uri: &Uri) -> bool {
+ match uri.host_ip() {
+ Some(ip_addr) if self.exclude_loopback_ips && ip_addr.is_loopback() => true,
+ // Note: in a pathological case, an IPv6 address can be IPv4-mapped
+ // (IPv4 address embedded in a IPv6). We purposefully
+ // don't deal with it here, and assume if an address is IPv6,
+ // we shouldn't attempt to map it to IPv4.
+ // See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2
+ Some(IpAddr::V4(v4_addr)) if self.exclude_private_ips && v4_addr.is_private() => true,
+ Some(IpAddr::V4(v4_addr)) if self.exclude_link_local_ips && v4_addr.is_link_local() => {
+ true
+ }
+ _ => false,
}
}
+ #[inline]
+ #[must_use]
+ pub fn is_scheme_excluded(&self, uri: &Uri) -> bool {
+ matches!(self.scheme, Some(ref scheme) if scheme != uri.scheme())
+ }
+
+ #[inline]
+ fn is_includes_empty(&self) -> bool {
+ !matches!(self.includes, Some(ref includes) if !includes.is_empty())
+ }
+
+ #[inline]
+ fn is_excludes_empty(&self) -> bool {
+ !matches!(self.excludes, Some(ref excludes) if !excludes.is_empty())
+ }
+
+ #[inline]
+ fn is_includes_match(&self, input: &str) -> bool {
+ matches!(self.includes, Some(ref includes) if includes.is_match(input))
+ }
+
+ #[inline]
+ fn is_excludes_match(&self, input: &str) -> bool {
+ matches!(self.excludes, Some(ref excludes) if excludes.is_match(input))
+ }
+
+ /// Determine whether a given [`Uri`] should be excluded.
+ ///
+ /// # Details
+ ///
+ /// 1. If any of the following conditions are met, the URI is excluded:
+ /// - If it's a mail address and it's configured to ignore mail addresses.
+ /// - If the IP address belongs to a type that is configured to exclude.
+ /// - If the scheme of URI is not the allowed scheme.
+ /// 2. Decide whether the URI is *presumably included* or *explicitly included*:
+ /// - When both excludes and includes rules are empty, it's *presumably included* unless
+ /// it's a known false positive.
+ /// - When the includes rules matches the URI, it's *explicitly included*.
+ /// 3. When it's a known *false positive* pattern, it's *explicitly excluded*.
+ /// 4. Decide whether the URI is *presumably excluded* or *explicitly excluded*:
+ /// - When excludes rules is empty, but includes rules doesn't match the URI, it's
+ /// *presumably excluded*.
+ /// - When the excludes rules matches the URI, it's *explicitly excluded*.
+ /// - When the excludes rules matches the URI, it's *explicitly excluded*.
#[must_use]
pub fn is_excluded(&self, uri: &Uri) -> bool {
- // Skip mail?
- if self.excludes.is_mail_excluded() && uri.scheme() == "mailto" {
- return true;
- }
- // Skip specific IP address?
- if self.excludes.ip(&uri) {
+ // Skip mail address, specific IP, and scheme
+ if self.is_mail_excluded(uri) || self.is_ip_excluded(uri) || self.is_scheme_excluded(uri) {
return true;
}
let input = uri.as_str();
- if self.includes.is_empty() {
- if self.excludes.is_empty() {
- // No regex includes/excludes at all?
- // Not excluded unless it's a known false positive
- return Excludes::is_false_positive(input);
+
+ if self.is_includes_empty() {
+ if self.is_excludes_empty() {
+ // Both excludes and includes rules are empty:
+ // *Presumably included* unless it's false positive
+ return is_false_positive(input);
}
- } else if self.includes.regex(input) {
- // Included explicitly (Includes take precedence over excludes)
+ } else if self.is_includes_match(input) {
+ // *Explicitly included* (Includes take precedence over excludes)
return false;
}
- // Exclude well-known false-positives.
- // This is done after checking includes to allow for user-overwrites.
- if Excludes::is_false_positive(uri.as_str()) {
- return true;
- }
- if self.excludes.is_empty() {
- if !self.includes.is_empty() {
- // In case we have includes and no excludes,
- // skip everything that was not included
- return true;
- }
- } else if self.excludes.regex(input) {
- // Excluded explicitly
+
+ if is_false_positive(input)
+ // Exclude well-known false-positives
+ // Performed after checking includes to allow user-overwriddes
+ || self.is_excludes_empty()
+ // Previous checks imply input is not explicitly included,
+ // if excludes rules is empty, then *presumably excluded*
+ || self.is_excludes_match(input)
+ // If excludes rules matches input, then
+ // *explicitly excluded*
+ {
return true;
}
- // URI scheme excluded?
- matches!(self.scheme, Some(ref scheme) if scheme != uri.scheme())
+ false
}
}
@@ -94,21 +167,22 @@ mod test {
const V4_LOOPBACK: &str = "http://127.0.0.1";
const V6_LOOPBACK: &str = "http://[::1]";
- const V4_LINK_LOCAL: &str = "http://169.254.0.1";
+ const V4_LINK_LOCAL_1: &str = "http://169.254.0.1";
+ const V4_LINK_LOCAL_2: &str = "http://169.254.10.1:8080";
// IPv4-Mapped IPv6 addresses (IPv4 embedded in IPv6)
const V6_MAPPED_V4_PRIVATE_CLASS_A: &str = "http://[::ffff:10.0.0.1]";
const V6_MAPPED_V4_LINK_LOCAL: &str = "http://[::ffff:169.254.0.1]";
macro_rules! assert_ip_address {
- (v4: $ip:expr, $predicate:tt) => {{
+ (v4: $ip:expr, $predicate:tt) => {
let res = if let Host::Ipv4(ipv4) = Url::parse($ip).map_err(|_| ())?.host().ok_or(())? {
ipv4.$predicate()
} else {
false
};
std::assert!(res);
- }};
+ };
(v6: $ip:expr, $predicate:tt) => {
let res = if let Host::Ipv6(ipv6) = Url::parse($ip).map_err(|_| ())?.host().ok_or(())? {
ipv6.$predicate()
@@ -129,7 +203,8 @@ mod test {
assert_ip_address!(v4: V4_LOOPBACK, is_loopback);
assert_ip_address!(v6: V6_LOOPBACK, is_loopback);
- assert_ip_address!(v4: V4_LINK_LOCAL, is_link_local);
+ assert_ip_address!(v4: V4_LINK_LOCAL_1, is_link_local);
+ assert_ip_address!(v4: V4_LINK_LOCAL_2, is_link_local);
Ok(())
}
@@ -154,10 +229,10 @@ mod test {
#[test]
fn test_overwrite_false_positives() {
let includes = Includes {
- regex: Some(RegexSet::new(&[r"http://www.w3.org/1999/xhtml"]).unwrap()),
+ regex: RegexSet::new(&[r"http://www.w3.org/1999/xhtml"]).unwrap(),
};
let filter = Filter {
- includes,
+ includes: Some(includes),
..Filter::default()
};
assert!(!filter.is_excluded(&website("http://www.w3.org/1999/xhtml")));
@@ -166,10 +241,10 @@ mod test {
#[test]
fn test_include_regex() {
let includes = Includes {
- regex: Some(RegexSet::new(&[r"foo.example.org"]).unwrap()),
+ regex: RegexSet::new(&[r"foo.example.org"]).unwrap(),
};
let filter = Filter {
- includes,
+ includes: Some(includes),
..Filter::default()
};
@@ -181,12 +256,8 @@ mod test {
#[test]
fn test_exclude_mail() {
- let excludes = Excludes {
- mail: true,
- ..Excludes::default()
- };
let filter = Filter {
- excludes,
+ exclude_mail: true,
..Filter::default()
};
@@ -198,13 +269,10 @@ mod test {
#[test]
fn test_exclude_regex() {
let excludes = Excludes {
- regex: Some(
- RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.org"]).unwrap(),
- ),
- ..Excludes::default()
+ regex: RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.org"]).unwrap(),
};
let filter = Filter {
- excludes,
+ excludes: Some(excludes),
..Filter::default()
};
@@ -218,15 +286,14 @@ mod test {
#[test]
fn test_exclude_include_regex() {
let includes = Includes {
- regex: Some(RegexSet::new(&[r"foo.example.org"]).unwrap()),
+ regex: RegexSet::new(&[r"foo.example.org"]).unwrap(),
};
let excludes = Excludes {
- regex: Some(RegexSet::new(&[r"example.org"]).unwrap()),
- ..Excludes::default()
+ regex: RegexSet::new(&[r"example.org"]).unwrap(),
};
let filter = Filter {
- includes,
- excludes,
+ includes: Some(includes),
+ excludes: Some(excludes),
..Filter::default()
};
@@ -244,7 +311,8 @@ mod test {
assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_A)));
assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_B)));
assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_C)));
- assert!(!filter.is_excluded(&website(V4_LINK_LOCAL)));
+ assert!(!filter.is_excluded(&website(V4_LINK_LOCAL_1)));
+ assert!(!filter.is_excluded(&website(V4_LINK_LOCAL_2)));
assert!(!filter.is_excluded(&website(V4_LOOPBACK)));
assert!(!filter.is_excluded(&website(V6_LOOPBACK)));
}
@@ -252,10 +320,7 @@ mod test {
#[test]
fn test_exclude_private_ips() {
let filter = Filter {
- excludes: Excludes {
- private_ips: true,
- ..Excludes::default()
- },
+ exclude_private_ips: true,
..Filter::default()
};
@@ -267,23 +332,18 @@ mod test {
#[test]
fn test_exclude_link_local() {
let filter = Filter {
- excludes: Excludes {
- link_local_ips: true,
- ..Excludes::default()
- },
+ exclude_link_local_ips: true,
..Filter::default()
};
- assert!(filter.is_excluded(&website(V4_LINK_LOCAL)));
+ assert!(filter.is_excluded(&website(V4_LINK_LOCAL_1)));
+ assert!(filter.is_excluded(&website(V4_LINK_LOCAL_2)));
}
#[test]
fn test_exclude_loopback() {
let filter = Filter {
- excludes: Excludes {
- loopback_ips: true,
- ..Excludes::default()
- },
+ exclude_loopback_ips: true,
..Filter::default()
};
@@ -294,11 +354,8 @@ mod test {
#[test]
fn test_exclude_ip_v4_mapped_ip_v6_not_supported() {
let filter = Filter {
- excludes: Excludes {
- private_ips: true,
- link_local_ips: true,
- ..Excludes::default()
- },
+ exclude_private_ips: true,
+ exclude_link_local_ips: true,
..Filter::default()
};
diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs
index c665bb15cd..daa6043ce6 100644
--- a/lychee-lib/src/lib.rs
+++ b/lychee-lib/src/lib.rs
@@ -20,7 +20,7 @@
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
-//! let client = ClientBuilder::default().build()?;
+//! let client = ClientBuilder::default().client()?;
//! let response = client.check("https://github.com/lycheeverse/lychee").await?;
//! assert!(response.status().is_success());
//! Ok(())
@@ -63,6 +63,7 @@ use doc_comment as _; // required for doctest
use openssl_sys as _; // required for vendored-openssl feature
use ring as _; // required for apple silicon
+#[doc(inline)]
pub use crate::{
client::{check, ClientBuilder},
client_pool::ClientPool,
diff --git a/lychee-lib/src/test_utils.rs b/lychee-lib/src/test_utils.rs
index def682706a..2f2f78ceb2 100644
--- a/lychee-lib/src/test_utils.rs
+++ b/lychee-lib/src/test_utils.rs
@@ -21,7 +21,7 @@ where
ErrorKind: From,
{
ClientBuilder::default()
- .build()
+ .client()
.unwrap()
.check(request)
.await
diff --git a/lychee-lib/src/uri.rs b/lychee-lib/src/uri.rs
index 92e51ad14e..fa7d082775 100644
--- a/lychee-lib/src/uri.rs
+++ b/lychee-lib/src/uri.rs
@@ -56,10 +56,7 @@ impl Uri {
// TODO: Support GitLab etc.
pub(crate) fn extract_github(&self) -> Option<(&str, &str)> {
- debug_assert!(
- !matches!(self.scheme(), "mailto"),
- "Should only be called on a Website type!"
- );
+ debug_assert!(!self.is_mail(), "Should only be called on a Website type!");
// TODO: Support more patterns
if matches!(
@@ -74,6 +71,11 @@ impl Uri {
None
}
+
+ #[inline]
+ pub(crate) fn is_mail(&self) -> bool {
+ self.scheme() == "mailto"
+ }
}
impl AsRef for Uri {