add str deunicode command (#13270)

# Description

Sometimes it's helpful to deal with only ASCII. This command will take a
unicode string as input and convert it to ASCII using the deunicode
crate.

```nushell
❯ "A…B" | str deunicode
A...B
```

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Darren Schroeder 2024-06-29 16:12:34 -05:00 committed by GitHub
parent 40e629beb1
commit 33d0537cae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 111 additions and 1 deletions

9
Cargo.lock generated
View file

@ -377,7 +377,7 @@ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.11.0", "itertools 0.12.1",
"lazy_static", "lazy_static",
"lazycell", "lazycell",
"proc-macro2", "proc-macro2",
@ -1148,6 +1148,12 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "deunicode"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00"
[[package]] [[package]]
name = "dialoguer" name = "dialoguer"
version = "0.11.0" version = "0.11.0"
@ -2967,6 +2973,7 @@ dependencies = [
"chrono-tz 0.8.6", "chrono-tz 0.8.6",
"crossterm", "crossterm",
"csv", "csv",
"deunicode",
"dialoguer", "dialoguer",
"digest", "digest",
"dirs-next", "dirs-next",

View file

@ -80,6 +80,7 @@ crossbeam-channel = "0.5.8"
crossterm = "0.27" crossterm = "0.27"
csv = "1.3" csv = "1.3"
ctrlc = "3.4" ctrlc = "3.4"
deunicode = "1.6.0"
dialoguer = { default-features = false, version = "0.11" } dialoguer = { default-features = false, version = "0.11" }
digest = { default-features = false, version = "0.10" } digest = { default-features = false, version = "0.10" }
dirs-next = "2.0" dirs-next = "2.0"

View file

@ -42,6 +42,7 @@ chrono-humanize = { workspace = true }
chrono-tz = { workspace = true } chrono-tz = { workspace = true }
crossterm = { workspace = true } crossterm = { workspace = true }
csv = { workspace = true } csv = { workspace = true }
deunicode = { workspace = true }
dialoguer = { workspace = true, default-features = false, features = ["fuzzy-select"] } dialoguer = { workspace = true, default-features = false, features = ["fuzzy-select"] }
digest = { workspace = true, default-features = false } digest = { workspace = true, default-features = false }
dtparse = { workspace = true } dtparse = { workspace = true }

View file

@ -189,6 +189,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState {
Str, Str,
StrCapitalize, StrCapitalize,
StrContains, StrContains,
StrDeunicode,
StrDistance, StrDistance,
StrDowncase, StrDowncase,
StrEndswith, StrEndswith,

View file

@ -0,0 +1,98 @@
use deunicode::deunicode;
use nu_cmd_base::input_handler::{operate, CellPathOnlyArgs};
use nu_engine::command_prelude::*;
use nu_protocol::engine::StateWorkingSet;
#[derive(Clone)]
pub struct SubCommand;
impl Command for SubCommand {
fn name(&self) -> &str {
"str deunicode"
}
fn signature(&self) -> Signature {
Signature::build("str deunicode")
.input_output_types(vec![(Type::String, Type::String)])
.category(Category::Strings)
}
fn usage(&self) -> &str {
"Convert Unicode string to pure ASCII."
}
fn search_terms(&self) -> Vec<&str> {
vec!["convert", "ascii"]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 0)?;
let args = CellPathOnlyArgs::from(cell_paths);
operate(action, args, input, call.head, engine_state.ctrlc.clone())
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let cell_paths: Vec<CellPath> = call.rest_const(working_set, 0)?;
let args = CellPathOnlyArgs::from(cell_paths);
operate(
action,
args,
input,
call.head,
working_set.permanent().ctrlc.clone(),
)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "deunicode a string",
example: "'A…B' | str deunicode",
result: Some(Value::test_string("A...B")),
}]
}
}
fn action(input: &Value, _args: &CellPathOnlyArgs, head: Span) -> Value {
match input {
Value::String { val, .. } => Value::string(deunicode(val), head),
Value::Error { .. } => input.clone(),
_ => Value::error(
ShellError::OnlySupportsThisInputType {
exp_input_type: "string".into(),
wrong_type: input.get_type().to_string(),
dst_span: head,
src_span: input.span(),
},
head,
),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(SubCommand {})
}
}

View file

@ -1,5 +1,6 @@
mod case; mod case;
mod contains; mod contains;
mod deunicode;
mod distance; mod distance;
mod ends_with; mod ends_with;
mod expand; mod expand;
@ -15,6 +16,7 @@ mod trim;
pub use case::*; pub use case::*;
pub use contains::SubCommand as StrContains; pub use contains::SubCommand as StrContains;
pub use deunicode::SubCommand as StrDeunicode;
pub use distance::SubCommand as StrDistance; pub use distance::SubCommand as StrDistance;
pub use ends_with::SubCommand as StrEndswith; pub use ends_with::SubCommand as StrEndswith;
pub use expand::SubCommand as StrExpand; pub use expand::SubCommand as StrExpand;