Auto merge of #11116 - y21:format_collect, r=Manishearth

new lint: `format_collect`

A perf lint that looks for `format!`ing inside of `map`, then collecting it into a `String`. Did a quick benchmark locally and it's a bit more than 2x faster with fold.
`write!` is still not optimal (presumably because the fmt stuff goes through dynamic dispatch), but it's still a lot better than creating a new string on every element.
I thought about making a machine applicable suggestion, but there's a lot of suggestions that need to be made here, so I decided to just add help messages.

changelog: new lint: `format_collect`
This commit is contained in:
bors 2023-07-17 15:38:33 +00:00
commit 410456da51
7 changed files with 168 additions and 3 deletions

View file

@ -4865,6 +4865,7 @@ Released 2018-09-13
[`forget_copy`]: https://rust-lang.github.io/rust-clippy/master/index.html#forget_copy
[`forget_non_drop`]: https://rust-lang.github.io/rust-clippy/master/index.html#forget_non_drop
[`forget_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#forget_ref
[`format_collect`]: https://rust-lang.github.io/rust-clippy/master/index.html#format_collect
[`format_in_format_args`]: https://rust-lang.github.io/rust-clippy/master/index.html#format_in_format_args
[`format_push_string`]: https://rust-lang.github.io/rust-clippy/master/index.html#format_push_string
[`from_iter_instead_of_collect`]: https://rust-lang.github.io/rust-clippy/master/index.html#from_iter_instead_of_collect

View file

@ -338,6 +338,7 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[
crate::methods::FILTER_NEXT_INFO,
crate::methods::FLAT_MAP_IDENTITY_INFO,
crate::methods::FLAT_MAP_OPTION_INFO,
crate::methods::FORMAT_COLLECT_INFO,
crate::methods::FROM_ITER_INSTEAD_OF_COLLECT_INFO,
crate::methods::GET_FIRST_INFO,
crate::methods::GET_LAST_WITH_LEN_INFO,

View file

@ -0,0 +1,33 @@
use super::FORMAT_COLLECT;
use clippy_utils::diagnostics::span_lint_and_then;
use clippy_utils::macros::{is_format_macro, root_macro_call_first_node};
use clippy_utils::ty::is_type_lang_item;
use rustc_hir::{Expr, ExprKind, LangItem};
use rustc_lint::LateContext;
use rustc_span::Span;
/// Same as `peel_blocks` but only actually considers blocks that are not from an expansion.
/// This is needed because always calling `peel_blocks` would otherwise remove parts of the
/// `format!` macro, which would cause `root_macro_call_first_node` to return `None`.
fn peel_non_expn_blocks<'tcx>(expr: &'tcx Expr<'tcx>) -> Option<&'tcx Expr<'tcx>> {
match expr.kind {
ExprKind::Block(block, _) if !expr.span.from_expansion() => peel_non_expn_blocks(block.expr?),
_ => Some(expr),
}
}
pub(super) fn check(cx: &LateContext<'_>, expr: &Expr<'_>, map_arg: &Expr<'_>, map_span: Span) {
if is_type_lang_item(cx, cx.typeck_results().expr_ty(expr), LangItem::String)
&& let ExprKind::Closure(closure) = map_arg.kind
&& let body = cx.tcx.hir().body(closure.body)
&& let Some(value) = peel_non_expn_blocks(body.value)
&& let Some(mac) = root_macro_call_first_node(cx, value)
&& is_format_macro(cx, mac.def_id)
{
span_lint_and_then(cx, FORMAT_COLLECT, expr.span, "use of `format!` to build up a string from an iterator", |diag| {
diag.span_help(map_span, "call `fold` instead")
.span_help(value.span.source_callsite(), "... and use the `write!` macro here")
.note("this can be written more efficiently by appending to a `String` directly");
});
}
}

View file

@ -26,6 +26,7 @@ mod filter_map_next;
mod filter_next;
mod flat_map_identity;
mod flat_map_option;
mod format_collect;
mod from_iter_instead_of_collect;
mod get_first;
mod get_last_with_len;
@ -3378,6 +3379,41 @@ declare_clippy_lint! {
"calling `Stdin::read_line`, then trying to parse it without first trimming"
}
declare_clippy_lint! {
/// ### What it does
/// Checks for usage of `.map(|_| format!(..)).collect::<String>()`.
///
/// ### Why is this bad?
/// This allocates a new string for every element in the iterator.
/// This can be done more efficiently by creating the `String` once and appending to it in `Iterator::fold`,
/// using either the `write!` macro which supports exactly the same syntax as the `format!` macro,
/// or concatenating with `+` in case the iterator yields `&str`/`String`.
///
/// Note also that `write!`-ing into a `String` can never fail, despite the return type of `write!` being `std::fmt::Result`,
/// so it can be safely ignored or unwrapped.
///
/// ### Example
/// ```rust
/// fn hex_encode(bytes: &[u8]) -> String {
/// bytes.iter().map(|b| format!("{b:02X}")).collect()
/// }
/// ```
/// Use instead:
/// ```rust
/// use std::fmt::Write;
/// fn hex_encode(bytes: &[u8]) -> String {
/// bytes.iter().fold(String::new(), |mut output, b| {
/// let _ = write!(output, "{b:02X}");
/// output
/// })
/// }
/// ```
#[clippy::version = "1.72.0"]
pub FORMAT_COLLECT,
perf,
"`format!`ing every element in a collection, then collecting the strings into a new `String`"
}
pub struct Methods {
avoid_breaking_exported_api: bool,
msrv: Msrv,
@ -3512,6 +3548,7 @@ impl_lint_pass!(Methods => [
UNNECESSARY_LITERAL_UNWRAP,
DRAIN_COLLECT,
MANUAL_TRY_FOLD,
FORMAT_COLLECT,
]);
/// Extracts a method call name, args, and `Span` of the method name.
@ -3733,8 +3770,9 @@ impl Methods {
Some((name @ ("cloned" | "copied"), recv2, [], _, _)) => {
iter_cloned_collect::check(cx, name, expr, recv2);
},
Some(("map", m_recv, [m_arg], _, _)) => {
Some(("map", m_recv, [m_arg], m_ident_span, _)) => {
map_collect_result_unit::check(cx, expr, m_recv, m_arg);
format_collect::check(cx, expr, m_arg, m_ident_span);
},
Some(("take", take_self_arg, [take_arg], _, _)) => {
if self.msrv.meets(msrvs::STR_REPEAT) {

View file

@ -132,8 +132,7 @@ impl ClippyCmd {
let clippy_args: String = self
.clippy_args
.iter()
.map(|arg| format!("{arg}__CLIPPY_HACKERY__"))
.collect();
.fold(String::new(), |s, arg| s + arg + "__CLIPPY_HACKERY__");
// Currently, `CLIPPY_TERMINAL_WIDTH` is used only to format "unknown field" error messages.
let terminal_width = termize::dimensions().map_or(0, |(w, _)| w);

View file

@ -0,0 +1,31 @@
#![allow(unused, dead_code)]
#![warn(clippy::format_collect)]
fn hex_encode(bytes: &[u8]) -> String {
bytes.iter().map(|b| format!("{b:02X}")).collect()
}
#[rustfmt::skip]
fn hex_encode_deep(bytes: &[u8]) -> String {
bytes.iter().map(|b| {{{{{ format!("{b:02X}") }}}}}).collect()
}
macro_rules! fmt {
($x:ident) => {
format!("{x:02X}", x = $x)
};
}
fn from_macro(bytes: &[u8]) -> String {
bytes.iter().map(|x| fmt!(x)).collect()
}
fn with_block() -> String {
(1..10)
.map(|s| {
let y = 1;
format!("{s} {y}")
})
.collect()
}
fn main() {}

View file

@ -0,0 +1,62 @@
error: use of `format!` to build up a string from an iterator
--> $DIR/format_collect.rs:5:5
|
LL | bytes.iter().map(|b| format!("{b:02X}")).collect()
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
help: call `fold` instead
--> $DIR/format_collect.rs:5:18
|
LL | bytes.iter().map(|b| format!("{b:02X}")).collect()
| ^^^
help: ... and use the `write!` macro here
--> $DIR/format_collect.rs:5:26
|
LL | bytes.iter().map(|b| format!("{b:02X}")).collect()
| ^^^^^^^^^^^^^^^^^^
= note: this can be written more efficiently by appending to a `String` directly
= note: `-D clippy::format-collect` implied by `-D warnings`
error: use of `format!` to build up a string from an iterator
--> $DIR/format_collect.rs:10:5
|
LL | bytes.iter().map(|b| {{{{{ format!("{b:02X}") }}}}}).collect()
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
help: call `fold` instead
--> $DIR/format_collect.rs:10:18
|
LL | bytes.iter().map(|b| {{{{{ format!("{b:02X}") }}}}}).collect()
| ^^^
help: ... and use the `write!` macro here
--> $DIR/format_collect.rs:10:32
|
LL | bytes.iter().map(|b| {{{{{ format!("{b:02X}") }}}}}).collect()
| ^^^^^^^^^^^^^^^^^^
= note: this can be written more efficiently by appending to a `String` directly
error: use of `format!` to build up a string from an iterator
--> $DIR/format_collect.rs:24:5
|
LL | / (1..10)
LL | | .map(|s| {
LL | | let y = 1;
LL | | format!("{s} {y}")
LL | | })
LL | | .collect()
| |__________________^
|
help: call `fold` instead
--> $DIR/format_collect.rs:25:10
|
LL | .map(|s| {
| ^^^
help: ... and use the `write!` macro here
--> $DIR/format_collect.rs:27:13
|
LL | format!("{s} {y}")
| ^^^^^^^^^^^^^^^^^^
= note: this can be written more efficiently by appending to a `String` directly
error: aborting due to 3 previous errors