optimize hash md5 for binary input (#8860)

# Description

Fixes: #8260

# User-Facing Changes

`open bigfile | hash md5` no longer consumes too much memory

# Tests + Formatting

Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect` to check that you're using the standard code
style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- crates/nu-std/tests/run.nu` to run the tests for the
standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```

# After Submitting

If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
This commit is contained in:
WindSoilder 2023-04-14 01:29:06 +08:00 committed by GitHub
parent e892aad3f6
commit 017151dff1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2,10 +2,10 @@ use crate::input_handler::{operate, CmdArgument};
use nu_engine::CallExt;
use nu_protocol::ast::{Call, CellPath};
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::Span;
use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type, Value,
};
use nu_protocol::{IntoPipelineData, Span};
use std::marker::PhantomData;
pub trait HashDigest: digest::Digest + Clone {
@ -88,13 +88,58 @@ where
let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 0)?;
let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
let args = Arguments { binary, cell_paths };
operate(
action::<D>,
args,
input,
call.head,
engine_state.ctrlc.clone(),
)
let mut hasher = D::new();
match input {
PipelineData::ExternalStream {
stdout: Some(stream),
span,
..
} => {
for item in stream {
match item {
// String and binary data are valid byte patterns
Ok(Value::String { val, .. }) => hasher.update(val.as_bytes()),
Ok(Value::Binary { val, .. }) => hasher.update(val),
// If any Error value is output, echo it back
Ok(v @ Value::Error { .. }) => return Ok(v.into_pipeline_data()),
// Unsupported data
Ok(other) => {
return Ok(Value::Error {
error: Box::new(ShellError::OnlySupportsThisInputType {
exp_input_type: "string and binary".into(),
wrong_type: other.get_type().to_string(),
dst_span: span,
src_span: other.expect_span(),
}),
}
.into_pipeline_data());
}
Err(err) => return Err(err),
};
}
let digest = hasher.finalize();
if args.binary {
Ok(Value::Binary {
val: digest.to_vec(),
span,
}
.into_pipeline_data())
} else {
Ok(Value::String {
val: format!("{digest:x}"),
span,
}
.into_pipeline_data())
}
}
_ => operate(
action::<D>,
args,
input,
call.head,
engine_state.ctrlc.clone(),
),
}
}
}