mirror of
https://github.com/nushell/nushell
synced 2025-01-07 18:59:04 +00:00
c602b5a1e8
# Description `bytes starts-with` converts the input into a `Value` before running .starts_with to find if the binary matches. This has two side effects: it makes the code simpler, only dealing in whole values, and simplifying a lot of input pipeline handling and value transforming it would otherwise have to do. _Especially_ in the presence of a cell path to drill into. It also makes buffers the entire input into memory, which can take up a lot of memory when dealing with large files, especially if you only want to check the first few bytes (like for a magic number). This PR adds a special branch on PipelineData::ExternalStream with a streaming version of starts_with. # User-Facing Changes Opening large files and running bytes starts-with on them will not take a long time. # Tests + Formatting Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass # Drawbacks Streaming checking is more complicated, and there may be bugs. I tested it with multiple chunks with string data and binary data and it seems to work alright up to 8k and over bytes, though. The existing `operate` method still exists because the way it handles cell paths and values is complicated. This causes some "code duplication", or at least some intent duplication, between the value code and the streaming code. This might be worthwhile considering the performance gains (approaching infinity on larger inputs). Another thing to consider is that my ExternalStream branch considers string data as valid input. The operate branch only parses Binary values, so it would fail. `open` is kind of unpredictable on whether it returns string data or binary data, even when passing `--raw`. I think this can be a problem but not really one I'm trying to tackle in this PR, so, it's worth considering.
111 lines
1.3 KiB
Rust
111 lines
1.3 KiB
Rust
mod alias;
|
|
mod all;
|
|
mod any;
|
|
mod append;
|
|
mod assignment;
|
|
mod break_;
|
|
mod bytes;
|
|
mod cal;
|
|
mod cd;
|
|
mod compact;
|
|
mod continue_;
|
|
mod cp;
|
|
mod date;
|
|
mod def;
|
|
mod default;
|
|
mod do_;
|
|
mod drop;
|
|
mod each;
|
|
mod echo;
|
|
mod empty;
|
|
mod enter;
|
|
mod error_make;
|
|
mod every;
|
|
#[cfg(not(windows))]
|
|
mod exec;
|
|
mod export_def;
|
|
mod fill;
|
|
mod find;
|
|
mod first;
|
|
mod flatten;
|
|
mod for_;
|
|
mod format;
|
|
mod g;
|
|
mod get;
|
|
mod glob;
|
|
mod group_by;
|
|
mod hash_;
|
|
mod headers;
|
|
mod help;
|
|
mod histogram;
|
|
mod insert;
|
|
mod into_filesize;
|
|
mod into_int;
|
|
mod last;
|
|
mod length;
|
|
mod let_;
|
|
mod lines;
|
|
mod loop_;
|
|
mod ls;
|
|
mod math;
|
|
mod merge;
|
|
mod mkdir;
|
|
mod move_;
|
|
mod mut_;
|
|
mod n;
|
|
mod network;
|
|
mod nu_check;
|
|
mod open;
|
|
mod p;
|
|
mod parse;
|
|
mod path;
|
|
mod platform;
|
|
mod prepend;
|
|
mod print;
|
|
#[cfg(feature = "sqlite")]
|
|
mod query;
|
|
mod random;
|
|
mod range;
|
|
mod redirection;
|
|
mod reduce;
|
|
mod reject;
|
|
mod rename;
|
|
mod return_;
|
|
mod reverse;
|
|
mod rm;
|
|
mod roll;
|
|
mod rotate;
|
|
mod run_external;
|
|
mod save;
|
|
mod select;
|
|
mod semicolon;
|
|
mod seq;
|
|
mod seq_char;
|
|
mod shells;
|
|
mod skip;
|
|
mod sort;
|
|
mod sort_by;
|
|
mod source_env;
|
|
mod split_by;
|
|
mod split_column;
|
|
mod split_row;
|
|
mod str_;
|
|
mod table;
|
|
mod take;
|
|
mod to_text;
|
|
mod touch;
|
|
mod transpose;
|
|
mod try_;
|
|
mod uniq;
|
|
mod uniq_by;
|
|
mod update;
|
|
mod upsert;
|
|
mod url;
|
|
mod use_;
|
|
mod where_;
|
|
#[cfg(feature = "which-support")]
|
|
mod which;
|
|
mod while_;
|
|
mod with_env;
|
|
mod wrap;
|
|
mod zip;
|