From 712fec166d7fed06022c654b756c7c130215d5f0 Mon Sep 17 00:00:00 2001 From: Piepmatz Date: Fri, 23 Aug 2024 00:59:00 +0200 Subject: [PATCH] Improve working with `IntoValue` and `FromValue` for byte collections (#13641) # Description I was working with byte collections like `Vec` and [`bytes::Bytes`](https://docs.rs/bytes/1.7.1/bytes/struct.Bytes.html), both are currently not possible to be used directly in a struct that derives `IntoValue` and `FromValue` at the same time. The `Vec` will convert itself into a `Value::List` but expects a `Value::String` or `Value::Binary` to load from. I now also implemented that it can load from `Value::List` just like the other `Vec` versions. For further working with byte collections the type `bytes::Bytes` is wildly used, therefore I added a implementation for it. `bytes` is already part of the dependency graph as many crates (more than 5000 to crates.io) use it. # User-Facing Changes User of `nu-protocol` as library, e.g. plugin developers, can now use byte collections more easily in their data structures and derive `IntoValue` and `FromValue` for it. # Tests + Formatting I added a few tests that check that these byte collections are correctly translated in and from `Value`. They live in `test_derive.rs` as part of the `ByteContainer` and I also explicitely tested that `FromValue` for `Vec` works as expected. - :green_circle: `toolkit fmt` - :green_circle: `toolkit clippy` - :green_circle: `toolkit test` - :green_circle: `toolkit test stdlib` # After Submitting Maybe it should be explored if `Value::Binary` should use `bytes::Bytes` instead of `Vec`. --- Cargo.lock | 1 + Cargo.toml | 1 + crates/nu-protocol/Cargo.toml | 1 + crates/nu-protocol/src/value/from_value.rs | 116 ++++++++++++++++---- crates/nu-protocol/src/value/into_value.rs | 8 ++ crates/nu-protocol/src/value/test_derive.rs | 54 +++++++++ 6 files changed, 162 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd631a7e56..54d80b1449 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3364,6 +3364,7 @@ version = "0.97.2" dependencies = [ "brotli", "byte-unit", + "bytes", "chrono", "chrono-humanize", "convert_case", diff --git a/Cargo.toml b/Cargo.toml index 1350a8d217..aa3eb60e45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ base64 = "0.22.1" bracoxide = "0.1.2" brotli = "5.0" byteorder = "1.5" +bytes = "1" bytesize = "1.3" calamine = "0.24.0" chardetng = "0.1.17" diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index e48f96a213..5a3829bbfe 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -19,6 +19,7 @@ nu-system = { path = "../nu-system", version = "0.97.2" } nu-derive-value = { path = "../nu-derive-value", version = "0.97.2" } brotli = { workspace = true, optional = true } +bytes = { workspace = true } byte-unit = { version = "5.1", features = [ "serde" ] } chrono = { workspace = true, features = [ "serde", "std", "unstable-locales" ], default-features = false } chrono-humanize = { workspace = true } diff --git a/crates/nu-protocol/src/value/from_value.rs b/crates/nu-protocol/src/value/from_value.rs index 1c3bc7dfd4..d021b0b3a8 100644 --- a/crates/nu-protocol/src/value/from_value.rs +++ b/crates/nu-protocol/src/value/from_value.rs @@ -1,13 +1,14 @@ use crate::{ ast::{CellPath, PathMember}, engine::Closure, - NuGlob, Range, Record, ShellError, Spanned, Type, Value, + NuGlob, Range, Record, ShellError, Span, Spanned, Type, Value, }; use chrono::{DateTime, FixedOffset}; use std::{ any, cmp::Ordering, collections::{HashMap, VecDeque}, + fmt, path::PathBuf, str::FromStr, }; @@ -221,20 +222,8 @@ macro_rules! impl_from_value_for_int { "int should be within the valid range for {}", stringify!($type) ), - i64::MIN..=MIN => ShellError::GenericError { - error: "Integer too small".to_string(), - msg: format!("{int} is smaller than {}", <$type>::MIN), - span: Some(span), - help: None, - inner: vec![], - }, - MAX..=i64::MAX => ShellError::GenericError { - error: "Integer too large".to_string(), - msg: format!("{int} is larger than {}", <$type>::MAX), - span: Some(span), - help: None, - inner: vec![], - }, + i64::MIN..=MIN => int_too_small_error(int, <$type>::MIN, span), + MAX..=i64::MAX => int_too_large_error(int, <$type>::MAX, span), }) } @@ -417,14 +406,31 @@ impl FromValue for String { } } -// This impl is different from Vec as it reads from Value::Binary and -// Value::String instead of Value::List. -// This also denies implementing FromValue for u8. +// This impl is different from Vec as it allows reading from Value::Binary and Value::String too. +// This also denies implementing FromValue for u8 as it would be in conflict with the Vec impl. impl FromValue for Vec { fn from_value(v: Value) -> Result { match v { Value::Binary { val, .. } => Ok(val), Value::String { val, .. } => Ok(val.into_bytes()), + Value::List { vals, .. } => { + const U8MIN: i64 = u8::MIN as i64; + const U8MAX: i64 = u8::MAX as i64; + let mut this = Vec::with_capacity(vals.len()); + for val in vals { + let span = val.span(); + let int = i64::from_value(val)?; + // calculating -1 on these ranges would be less readable + #[allow(overlapping_range_endpoints)] + #[allow(clippy::match_overlapping_arm)] + match int { + U8MIN..=U8MAX => this.push(int as u8), + i64::MIN..=U8MIN => return Err(int_too_small_error(int, U8MIN, span)), + U8MAX..=i64::MAX => return Err(int_too_large_error(int, U8MAX, span)), + }; + } + Ok(this) + } v => Err(ShellError::CantConvert { to_type: Self::expected_type().to_string(), from_type: v.get_type().to_string(), @@ -664,9 +670,51 @@ where } } +// Foreign Types + +impl FromValue for bytes::Bytes { + fn from_value(v: Value) -> Result { + match v { + Value::Binary { val, .. } => Ok(val.into()), + v => Err(ShellError::CantConvert { + to_type: Self::expected_type().to_string(), + from_type: v.get_type().to_string(), + span: v.span(), + help: None, + }), + } + } + + fn expected_type() -> Type { + Type::Binary + } +} + +// Use generics with `fmt::Display` to allow passing different kinds of integer +fn int_too_small_error(int: impl fmt::Display, min: impl fmt::Display, span: Span) -> ShellError { + ShellError::GenericError { + error: "Integer too small".to_string(), + msg: format!("{int} is smaller than {min}"), + span: Some(span), + help: None, + inner: vec![], + } +} + +fn int_too_large_error(int: impl fmt::Display, max: impl fmt::Display, span: Span) -> ShellError { + ShellError::GenericError { + error: "Integer too large".to_string(), + msg: format!("{int} is larger than {max}"), + span: Some(span), + help: None, + inner: vec![], + } +} + #[cfg(test)] mod tests { - use crate::{engine::Closure, FromValue, Record, Type}; + use crate::{engine::Closure, FromValue, IntoValue, Record, Span, Type, Value}; + use std::ops::Deref; #[test] fn expected_type_default_impl() { @@ -680,4 +728,34 @@ mod tests { Type::Custom("Closure".to_string().into_boxed_str()) ); } + + #[test] + fn from_value_vec_u8() { + let vec: Vec = vec![1, 2, 3]; + let span = Span::test_data(); + let string = "Hello Vec!".to_string(); + + assert_eq!( + Vec::::from_value(vec.clone().into_value(span)).unwrap(), + vec.clone(), + "Vec roundtrip" + ); + + assert_eq!( + Vec::::from_value(Value::test_string(string.clone())) + .unwrap() + .deref(), + string.as_bytes(), + "Vec from String" + ); + + assert_eq!( + Vec::::from_value(Value::test_binary(vec.clone())).unwrap(), + vec, + "Vec from Binary" + ); + + assert!(Vec::::from_value(vec![u8::MIN as i32 - 1].into_value(span)).is_err()); + assert!(Vec::::from_value(vec![u8::MAX as i32 + 1].into_value(span)).is_err()); + } } diff --git a/crates/nu-protocol/src/value/into_value.rs b/crates/nu-protocol/src/value/into_value.rs index c27b19b651..5730f9d8da 100644 --- a/crates/nu-protocol/src/value/into_value.rs +++ b/crates/nu-protocol/src/value/into_value.rs @@ -172,6 +172,14 @@ impl IntoValue for Value { } } +// Foreign Types + +impl IntoValue for bytes::Bytes { + fn into_value(self, span: Span) -> Value { + Value::binary(self.to_vec(), span) + } +} + // TODO: use this type for all the `into_value` methods that types implement but return a Result /// A trait for trying to convert a value into a `Value`. /// diff --git a/crates/nu-protocol/src/value/test_derive.rs b/crates/nu-protocol/src/value/test_derive.rs index 56bccabd2a..263d5f3ed4 100644 --- a/crates/nu-protocol/src/value/test_derive.rs +++ b/crates/nu-protocol/src/value/test_derive.rs @@ -1,4 +1,5 @@ use crate::{record, FromValue, IntoValue, Record, Span, Value}; +use bytes::Bytes; use std::collections::HashMap; // Make nu_protocol available in this namespace, consumers of this crate will @@ -440,3 +441,56 @@ enum_rename_all! { Flat: "flatcase" => ["alphaone", "betatwo", "charliethree"], UpperFlat: "UPPERFLATCASE" => ["ALPHAONE", "BETATWO", "CHARLIETHREE"] } + +#[derive(IntoValue, FromValue, Debug, PartialEq)] +struct ByteContainer { + vec: Vec, + bytes: Bytes, +} + +impl ByteContainer { + fn make() -> Self { + ByteContainer { + vec: vec![1, 2, 3], + bytes: Bytes::from_static(&[4, 5, 6]), + } + } + + fn value() -> Value { + Value::test_record(record! { + "vec" => Value::test_list(vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + ]), + "bytes" => Value::test_binary(vec![4, 5, 6]), + }) + } +} + +#[test] +fn bytes_into_value() { + let expected = ByteContainer::value(); + let actual = ByteContainer::make().into_test_value(); + assert_eq!(expected, actual); +} + +#[test] +fn bytes_from_value() { + let expected = ByteContainer::make(); + let actual = ByteContainer::from_value(ByteContainer::value()).unwrap(); + assert_eq!(expected, actual); +} + +#[test] +fn bytes_roundtrip() { + let expected = ByteContainer::make(); + let actual = ByteContainer::from_value(ByteContainer::make().into_test_value()).unwrap(); + assert_eq!(expected, actual); + + let expected = ByteContainer::value(); + let actual = ByteContainer::from_value(ByteContainer::value()) + .unwrap() + .into_test_value(); + assert_eq!(expected, actual); +}