corrected missing shellerror type (#439)

This commit is contained in:
Fernando Herrera 2021-12-05 13:25:37 +00:00 committed by GitHub
parent 22469a9cb1
commit 29efbee285
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 723 additions and 607 deletions

35
Cargo.lock generated
View file

@ -120,11 +120,10 @@ dependencies = [
[[package]]
name = "arrow2"
version = "0.7.0"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d873e2775c3d87a4e8d77aa544cbd43f34a0779d5164c59e7c6a1dd0678eb395"
checksum = "d3452b2ae9727464a31a726c07ffec0c0da3b87831610d9ac99fc691c78b3a44"
dependencies = [
"ahash",
"arrow-format",
"base64",
"chrono",
@ -628,9 +627,9 @@ dependencies = [
[[package]]
name = "dirs"
version = "3.0.2"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309"
checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059"
dependencies = [
"dirs-sys",
]
@ -1823,9 +1822,9 @@ dependencies = [
[[package]]
name = "parquet2"
version = "0.6.0"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db82df54cdd88931d29b850190915b9069bb93fba8e1aefc0d59d8ca81603d6d"
checksum = "41051fae4c0fab9040e291b360c6c8037d09d482aa83e94e37f3d080a32a58c3"
dependencies = [
"async-stream",
"bitpacking",
@ -1916,9 +1915,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "polars"
version = "0.17.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c94a25d46e93b64eac7848c028a545dc08fa01e148e4942c5442b3843c3a598"
checksum = "3e9211d1bb8d2d81541e4ab80ce9148a8e2a987d6412c2a48017fbbe24231ea1"
dependencies = [
"polars-core",
"polars-io",
@ -1927,9 +1926,9 @@ dependencies = [
[[package]]
name = "polars-arrow"
version = "0.17.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cc4488d2f2d6b901bb6e5728e58966013a272cae48861070b676215a79b4a99"
checksum = "fa5ee9c385bf6643893f98efa80ff5a07169b50f65962c7843c0a13e12f0b0cf"
dependencies = [
"arrow2",
"num 0.4.0",
@ -1938,9 +1937,9 @@ dependencies = [
[[package]]
name = "polars-core"
version = "0.17.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6771524063d742a08163d96875ca5df71dff7113f27da58db5ec5fa164165bf6"
checksum = "3cb1de44e479ce2764a7a3ad057e16f434efa334feb993284e1a48bb8888c6d1"
dependencies = [
"ahash",
"anyhow",
@ -1963,15 +1962,15 @@ dependencies = [
[[package]]
name = "polars-io"
version = "0.17.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11a5f5f51525043ee7befd49e586e6919345237826a5f17b53956f8242100957"
checksum = "8bcb74f52ee9ff84863ae01de6ba25db092a9880302db4bf8f351f65b3ff0d12"
dependencies = [
"ahash",
"anyhow",
"arrow2",
"csv-core",
"dirs 3.0.2",
"dirs 4.0.0",
"lazy_static",
"lexical",
"memchr",
@ -1987,9 +1986,9 @@ dependencies = [
[[package]]
name = "polars-lazy"
version = "0.17.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da3ea647e2fa59d1bbbf90929c5d10ef6a9018aac256d1c6d0e8248211804b61"
checksum = "43f91022ba6463df71ad6eb80ac2307884578d9959e85e1fe9dac18988291d46"
dependencies = [
"ahash",
"itertools",

View file

@ -54,7 +54,7 @@ crossterm = "0.22.1"
num = {version="0.4.0", optional=true}
[dependencies.polars]
version = "0.17.0"
version = "0.18.0"
optional = true
features = ["default", "parquet", "json", "serde", "object", "checked_arithmetic", "strings"]

View file

@ -1,3 +1,3 @@
# nu-dataframe
The nu-dataframe crate holds the definitions of the dataframe structure
The nu-dataframe crate holds the definitions of the dataframe structures and commands

View file

@ -1,3 +1,3 @@
mod nu_dataframe;
pub use nu_dataframe::commands::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame};
pub use nu_dataframe::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame};

View file

@ -1,12 +0,0 @@
mod describe;
mod dtypes;
mod open;
mod to_df;
pub use describe::DescribeDF;
pub use dtypes::DataTypes;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;
#[cfg(test)]
mod test_dataframe;

View file

@ -1,6 +1,4 @@
use crate::dataframe::nu_dataframe::Column;
use super::super::NuDataFrame;
use super::nu_dataframe::{Column, NuDataFrame};
use nu_protocol::{
ast::Call,
@ -19,7 +17,7 @@ pub struct DescribeDF;
impl Command for DescribeDF {
fn name(&self) -> &str {
"describe"
"describe-df"
}
fn usage(&self) -> &str {
@ -27,13 +25,13 @@ impl Command for DescribeDF {
}
fn signature(&self) -> Signature {
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "dataframe description",
example: "[[a b]; [1 1] [1 1]] | to df | describe",
example: "[[a b]; [1 1] [1 1]] | to df | describe-df",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
@ -134,13 +132,14 @@ fn command(
.map(|col| {
let count = col.len() as f64;
let sum = match col.sum_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
let sum = col
.sum_as_series()
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
};
});
let mean = match col.mean_as_series().get(0) {
AnyValue::Float64(v) => Some(v),
@ -157,54 +156,50 @@ fn command(
_ => None,
};
let min = match col.min_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
let min = col
.min_as_series()
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
};
});
let q_25 = match col.quantile_as_series(0.25) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
},
Err(_) => None,
};
let q_50 = match col.quantile_as_series(0.50) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
},
Err(_) => None,
};
let q_75 = match col.quantile_as_series(0.75) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
},
Err(_) => None,
};
let max = match col.max_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
let q_25 = col
.quantile_as_series(0.25)
.ok()
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
};
});
let q_50 = col
.quantile_as_series(0.50)
.ok()
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
});
let q_75 = col
.quantile_as_series(0.75)
.ok()
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
});
let max = col
.max_as_series()
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
});
let name = format!("{} ({})", col.name(), col.dtype());
ChunkedArray::<Float64Type>::new_from_opt_slice(
@ -226,12 +221,12 @@ fn command(
});
let res = head.chain(tail).collect::<Vec<Series>>();
let df = DataFrame::new(res).map_err(|e| {
ShellError::LabeledError("Dataframe Error".into(), e.to_string(), call.head)
})?;
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
df, call.head,
)))
DataFrame::new(res)
.map_err(|e| {
ShellError::SpannedLabeledError("Dataframe Error".into(), e.to_string(), call.head)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
#[cfg(test)]

View file

@ -1,4 +1,4 @@
use super::super::{Column, NuDataFrame};
use super::nu_dataframe::{Column, NuDataFrame};
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
@ -18,12 +18,12 @@ impl Command for DataTypes {
}
fn signature(&self) -> Signature {
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop column a",
description: "Dataframe dtypes",
example: "[[a b]; [1 2] [3 4]] | to df | dtypes",
result: Some(
NuDataFrame::try_from_columns(vec![
@ -90,8 +90,8 @@ fn command(
let names_col = Column::new("column".to_string(), names);
let dtypes_col = Column::new("dtype".to_string(), dtypes);
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col])?;
Ok(PipelineData::Value(df.into_value(call.head)))
NuDataFrame::try_from_columns(vec![names_col, dtypes_col])
.map(|df| PipelineData::Value(df.into_value(call.head), None))
}
#[cfg(test)]

View file

@ -1,390 +1,14 @@
pub mod commands;
mod nu_dataframe;
mod between_values;
mod conversion;
mod custom_value;
mod operations;
mod describe;
mod dtypes;
mod open;
mod to_df;
use conversion::{Column, ColumnMap};
pub use describe::DescribeDF;
pub use dtypes::DataTypes;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;
use indexmap::map::IndexMap;
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
// DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to
// create dataframes with mixed datatypes
#[derive(Clone, Debug)]
pub struct DataFrameValue(Value);
impl DataFrameValue {
fn new(value: Value) -> Self {
Self(value)
}
fn get_value(&self) -> Value {
self.0.clone()
}
}
impl Display for DataFrameValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0.get_type())
}
}
impl Default for DataFrameValue {
fn default() -> Self {
Self(Value::Nothing {
span: Span::unknown(),
})
}
}
impl PartialEq for DataFrameValue {
fn eq(&self, other: &Self) -> bool {
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
}
}
impl Eq for DataFrameValue {}
impl std::hash::Hash for DataFrameValue {
fn hash<H: Hasher>(&self, state: &mut H) {
match &self.0 {
Value::Nothing { .. } => 0.hash(state),
Value::Int { val, .. } => val.hash(state),
Value::String { val, .. } => val.hash(state),
// TODO. Define hash for the rest of types
_ => {}
}
}
}
impl PolarsObject for DataFrameValue {
fn type_name() -> &'static str {
"object"
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NuDataFrame(DataFrame);
impl AsRef<DataFrame> for NuDataFrame {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.0
}
}
impl AsMut<DataFrame> for NuDataFrame {
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
&mut self.0
}
}
impl NuDataFrame {
pub fn new(dataframe: DataFrame) -> Self {
Self(dataframe)
}
fn default_value(span: Span) -> Value {
let dataframe = DataFrame::default();
NuDataFrame::dataframe_into_value(dataframe, span)
}
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
Value::CustomValue {
val: Box::new(Self::new(dataframe)),
span,
}
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
match DataFrame::new(vec![series]) {
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
// Dictionary to store the columnar data extracted from
// the input. During the iteration we check if the values
// have different type
let mut column_values: ColumnMap = IndexMap::new();
for value in iter {
match value {
Value::List { vals, .. } => {
let cols = (0..vals.len())
.map(|i| format!("{}", i))
.collect::<Vec<String>>();
conversion::insert_record(&mut column_values, &cols, &vals)?
}
Value::Record { cols, vals, .. } => {
conversion::insert_record(&mut column_values, &cols, &vals)?
}
_ => {
let key = "0".to_string();
conversion::insert_value(value, key, &mut column_values)?
}
}
}
conversion::from_parsed_columns(column_values)
}
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
// let dataframe = DataFrame::new(columns)
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
// Ok(Self::new(dataframe))
//}
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new();
for column in columns {
let name = column.name().to_string();
for value in column {
conversion::insert_value(value, name.clone(), &mut column_values)?;
}
}
conversion::from_parsed_columns(column_values)
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
match input.into_value(span) {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
Some(df) => Ok(NuDataFrame(df.0.clone())),
None => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
},
_ => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
}
}
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
let s = self.0.column(column).map_err(|_| {
let possibilities = self
.0
.get_column_names()
.iter()
.map(|name| name.to_string())
.collect::<Vec<String>>();
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
ShellError::DidYouMean(option, span)
})?;
let dataframe = DataFrame::new(vec![s.clone()])
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(Self(dataframe))
}
pub fn is_series(&self) -> bool {
self.0.width() == 1
}
pub fn as_series(&self, _span: Span) -> Result<Series, ShellError> {
if !self.is_series() {
return Err(ShellError::InternalError(
"DataFrame cannot be used as Series".into(),
));
}
let series = self
.0
.get_columns()
.get(0)
.expect("We have already checked that the width is 1");
Ok(series.clone())
}
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
let series = self.as_series(Span::unknown())?;
let column = conversion::create_column(&series, row, row + 1)?;
if column.len() == 0 {
Err(ShellError::AccessBeyondEnd(series.len(), span))
} else {
let value = column
.into_iter()
.next()
.expect("already checked there is a value");
Ok(value)
}
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let size: usize = 20;
if df.height() > size {
let sample_size = size / 2;
let mut values = self.head(Some(sample_size))?;
conversion::add_separator(&mut values, df);
let remaining = df.height() - sample_size;
let tail_size = remaining.min(sample_size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
Ok(values)
} else {
Ok(self.head(Some(size))?)
}
}
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let to_row = rows.unwrap_or(5);
let values = self.to_rows(0, to_row)?;
Ok(values)
}
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row)?;
Ok(values)
}
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let upper_row = to_row.min(df.height());
let mut size: usize = 0;
let columns = self
.0
.get_columns()
.iter()
.map(
|col| match conversion::create_column(col, from_row, upper_row) {
Ok(col) => {
size = col.len();
Ok(col)
}
Err(e) => Err(e),
},
)
.collect::<Result<Vec<Column>, ShellError>>()?;
let mut iterators = columns
.into_iter()
.map(|col| (col.name().to_string(), col.into_iter()))
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
let values = (0..size)
.into_iter()
.map(|_| {
let mut cols = vec![];
let mut vals = vec![];
for (name, col) in &mut iterators {
cols.push(name.clone());
match col.next() {
Some(v) => vals.push(v),
None => vals.push(Value::Nothing {
span: Span::unknown(),
}),
};
}
Value::Record {
cols,
vals,
span: Span::unknown(),
}
})
.collect::<Vec<Value>>();
Ok(values)
}
// Dataframes are considered equal if they have the same shape, column name and values
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
if self.as_ref().width() == 0 {
// checking for empty dataframe
return None;
}
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
// checking both dataframes share the same names
return None;
}
if self.as_ref().height() != other.as_ref().height() {
// checking both dataframes have the same row size
return None;
}
// sorting dataframe by the first column
let column_names = self.as_ref().get_column_names();
let first_col = column_names
.get(0)
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return None,
};
let rhs = match other.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return None,
};
for name in self.as_ref().get_column_names() {
let self_series = lhs.column(name).expect("name from dataframe names");
let other_series = rhs
.column(name)
.expect("already checked that name in other");
let self_series = match self_series.dtype() {
// Casting needed to compare other numeric types with nushell numeric type.
// In nushell we only have i64 integer numeric types and any array created
// with nushell untagged primitives will be of type i64
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
Ok(series) => series,
Err(_) => return None,
},
_ => self_series.clone(),
};
if !self_series.series_equal(other_series) {
return None;
}
}
Some(Ordering::Equal)
}
}
#[cfg(test)]
mod test_dataframe;

View file

@ -66,17 +66,21 @@ pub fn compute_between_series(
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Division error".into(),
e.to_string(),
right.span()?,
)),
}
}
Operator::Equal => {
let mut res = Series::eq(lhs, rhs).into_series();
let mut res = Series::equal(lhs, rhs).into_series();
let name = format!("eq_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::NotEqual => {
let mut res = Series::neq(lhs, rhs).into_series();
let mut res = Series::not_equal(lhs, rhs).into_series();
let name = format!("neq_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
@ -117,8 +121,10 @@ pub fn compute_between_series(
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
_ => Err(ShellError::InternalError(
_ => Err(ShellError::SpannedLabeledError(
"Incompatible types".into(),
"unable to cast to boolean".into(),
right.span()?,
)),
}
}
@ -142,8 +148,10 @@ pub fn compute_between_series(
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
_ => Err(ShellError::InternalError(
_ => Err(ShellError::SpannedLabeledError(
"Incompatible types".into(),
"unable to cast to boolean".into(),
right.span()?,
)),
}
}
@ -254,9 +262,9 @@ pub fn compute_series_single_value(
}),
},
Operator::Equal => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::eq, lhs_span),
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::equal, lhs_span),
Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::eq, lhs_span)
compare_series_decimal(&lhs, *val, ChunkedArray::equal, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
@ -267,9 +275,11 @@ pub fn compute_series_single_value(
}),
},
Operator::NotEqual => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::neq, lhs_span),
Value::Int { val, .. } => {
compare_series_i64(&lhs, *val, ChunkedArray::not_equal, lhs_span)
}
Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::neq, lhs_span)
compare_series_decimal(&lhs, *val, ChunkedArray::not_equal, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
@ -364,17 +374,25 @@ where
let casted = series.i64();
compute_casted_i64(casted, val, f, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to i64".into(),
e.to_string(),
span,
)),
}
}
DataType::Int64 => {
let casted = series.i64();
compute_casted_i64(casted, val, f, span)
}
_ => Err(ShellError::InternalError(format!(
"Series of type {} can not be used for operations with an i64 value",
series.dtype()
))),
_ => Err(ShellError::SpannedLabeledError(
"Incorrect type".into(),
format!(
"Series of type {} can not be used for operations with an i64 value",
series.dtype()
),
span,
)),
}
}
@ -393,7 +411,11 @@ where
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to i64".into(),
e.to_string(),
span,
)),
}
}
@ -415,17 +437,25 @@ where
let casted = series.f64();
compute_casted_f64(casted, val, f, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to f64".into(),
e.to_string(),
span,
)),
}
}
DataType::Float64 => {
let casted = series.f64();
compute_casted_f64(casted, val, f, span)
}
_ => Err(ShellError::InternalError(format!(
"Series of type {} can not be used for operations with a decimal value",
series.dtype()
))),
_ => Err(ShellError::SpannedLabeledError(
"Incorrect type".into(),
format!(
"Series of type {} can not be used for operations with a decimal value",
series.dtype()
),
span,
)),
}
}
@ -444,7 +474,11 @@ where
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to f64".into(),
e.to_string(),
span,
)),
}
}
@ -461,17 +495,25 @@ where
let casted = series.i64();
compare_casted_i64(casted, val, f, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to f64".into(),
e.to_string(),
span,
)),
}
}
DataType::Int64 => {
let casted = series.i64();
compare_casted_i64(casted, val, f, span)
}
_ => Err(ShellError::InternalError(format!(
"Series of type {} can not be used for operations with an i64 value",
series.dtype()
))),
_ => Err(ShellError::SpannedLabeledError(
"Incorrect type".into(),
format!(
"Series of type {} can not be used for operations with an i64 value",
series.dtype()
),
span,
)),
}
}
@ -490,7 +532,11 @@ where
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to i64".into(),
e.to_string(),
span,
)),
}
}
@ -512,17 +558,25 @@ where
let casted = series.f64();
compare_casted_f64(casted, val, f, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to i64".into(),
e.to_string(),
span,
)),
}
}
DataType::Float64 => {
let casted = series.f64();
compare_casted_f64(casted, val, f, span)
}
_ => Err(ShellError::InternalError(format!(
"Series of type {} can not be used for operations with a decimal value",
series.dtype()
))),
_ => Err(ShellError::SpannedLabeledError(
"Incorrect type".into(),
format!(
"Series of type {} can not be used for operations with a decimal value",
series.dtype()
),
span,
)),
}
}
@ -541,7 +595,11 @@ where
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to f64".into(),
e.to_string(),
span,
)),
}
}
@ -556,9 +614,17 @@ fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result<Value,
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Error using contains".into(),
e.to_string(),
span,
)),
}
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to string".into(),
e.to_string(),
span,
)),
}
}

View file

@ -122,69 +122,69 @@ pub fn create_column(
Ok(Column::new(series.name().into(), values))
}
DataType::UInt8 => {
let casted = series
.u8()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.u8().map_err(|e| {
ShellError::LabeledError("Error casting column to u8".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::UInt16 => {
let casted = series
.u16()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.u16().map_err(|e| {
ShellError::LabeledError("Error casting column to u16".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::UInt32 => {
let casted = series
.u32()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.u32().map_err(|e| {
ShellError::LabeledError("Error casting column to u32".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::UInt64 => {
let casted = series
.u64()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.u64().map_err(|e| {
ShellError::LabeledError("Error casting column to u64".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Int8 => {
let casted = series
.i8()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.i8().map_err(|e| {
ShellError::LabeledError("Error casting column to i8".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Int16 => {
let casted = series
.i16()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.i16().map_err(|e| {
ShellError::LabeledError("Error casting column to i16".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Int32 => {
let casted = series
.i32()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.i32().map_err(|e| {
ShellError::LabeledError("Error casting column to i32".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Int64 => {
let casted = series
.i64()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.i64().map_err(|e| {
ShellError::LabeledError("Error casting column to i64".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Float32 => {
let casted = series
.f32()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.f32().map_err(|e| {
ShellError::LabeledError("Error casting column to f32".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Float64 => {
let casted = series
.f64()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.f64().map_err(|e| {
ShellError::LabeledError("Error casting column to f64".into(), e.to_string())
})?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Boolean => {
let casted = series
.bool()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.bool().map_err(|e| {
ShellError::LabeledError("Error casting column to bool".into(), e.to_string())
})?;
let values = casted
.into_iter()
@ -204,9 +204,9 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values))
}
DataType::Utf8 => {
let casted = series
.utf8()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.utf8().map_err(|e| {
ShellError::LabeledError("Error casting column to string".into(), e.to_string())
})?;
let values = casted
.into_iter()
@ -231,10 +231,10 @@ pub fn create_column(
.downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>();
match casted {
None => Err(ShellError::InternalError(format!(
"Object not supported for conversion: {}",
x
))),
None => Err(ShellError::LabeledError(
"Error casting object from series".into(),
format!("Object not supported for conversion: {}", x),
)),
Some(ca) => {
let values = ca
.into_iter()
@ -253,9 +253,9 @@ pub fn create_column(
}
}
DataType::Date => {
let casted = series
.date()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.date().map_err(|e| {
ShellError::LabeledError("Error casting column to date".into(), e.to_string())
})?;
let values = casted
.into_iter()
@ -285,9 +285,9 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values))
}
DataType::Datetime => {
let casted = series
.datetime()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.datetime().map_err(|e| {
ShellError::LabeledError("Error casting column to datetime".into(), e.to_string())
})?;
let values = casted
.into_iter()
@ -317,9 +317,9 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values))
}
DataType::Time => {
let casted = series
.time()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let casted = series.time().map_err(|e| {
ShellError::LabeledError("Error casting column to time".into(), e.to_string())
})?;
let values = casted
.into_iter()
@ -338,10 +338,10 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values))
}
e => Err(ShellError::InternalError(format!(
"Value not supported in nushell: {}",
e
))),
e => Err(ShellError::LabeledError(
"Error creating Dataframe".into(),
format!("Value not supported in nushell: {}", e),
)),
}
}
@ -530,8 +530,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
}
}
match DataFrame::new(df_series) {
Ok(df) => Ok(NuDataFrame::new(df)),
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
DataFrame::new(df_series)
.map(|df| NuDataFrame::new(df))
.map_err(|e| ShellError::LabeledError("Error creating dataframe".into(), e.to_string()))
}

View file

@ -0,0 +1,395 @@
mod between_values;
mod conversion;
mod custom_value;
mod operations;
pub(super) use conversion::{Column, ColumnMap};
use indexmap::map::IndexMap;
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
// DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to
// create dataframes with mixed datatypes
#[derive(Clone, Debug)]
pub struct DataFrameValue(Value);
impl DataFrameValue {
fn new(value: Value) -> Self {
Self(value)
}
fn get_value(&self) -> Value {
self.0.clone()
}
}
impl Display for DataFrameValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0.get_type())
}
}
impl Default for DataFrameValue {
fn default() -> Self {
Self(Value::Nothing {
span: Span::unknown(),
})
}
}
impl PartialEq for DataFrameValue {
fn eq(&self, other: &Self) -> bool {
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
}
}
impl Eq for DataFrameValue {}
impl std::hash::Hash for DataFrameValue {
fn hash<H: Hasher>(&self, state: &mut H) {
match &self.0 {
Value::Nothing { .. } => 0.hash(state),
Value::Int { val, .. } => val.hash(state),
Value::String { val, .. } => val.hash(state),
// TODO. Define hash for the rest of types
_ => {}
}
}
}
impl PolarsObject for DataFrameValue {
fn type_name() -> &'static str {
"object"
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NuDataFrame(DataFrame);
impl AsRef<DataFrame> for NuDataFrame {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.0
}
}
impl AsMut<DataFrame> for NuDataFrame {
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
&mut self.0
}
}
impl NuDataFrame {
pub fn new(dataframe: DataFrame) -> Self {
Self(dataframe)
}
fn default_value(span: Span) -> Value {
let dataframe = DataFrame::default();
NuDataFrame::dataframe_into_value(dataframe, span)
}
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
Value::CustomValue {
val: Box::new(Self::new(dataframe)),
span,
}
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
match DataFrame::new(vec![series]) {
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
Err(e) => Err(ShellError::SpannedLabeledError(
"Error creating dataframe".into(),
e.to_string(),
span,
)),
}
}
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
// Dictionary to store the columnar data extracted from
// the input. During the iteration we check if the values
// have different type
let mut column_values: ColumnMap = IndexMap::new();
for value in iter {
match value {
Value::List { vals, .. } => {
let cols = (0..vals.len())
.map(|i| format!("{}", i))
.collect::<Vec<String>>();
conversion::insert_record(&mut column_values, &cols, &vals)?
}
Value::Record { cols, vals, .. } => {
conversion::insert_record(&mut column_values, &cols, &vals)?
}
_ => {
let key = "0".to_string();
conversion::insert_value(value, key, &mut column_values)?
}
}
}
conversion::from_parsed_columns(column_values)
}
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
// let dataframe = DataFrame::new(columns)
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
// Ok(Self::new(dataframe))
//}
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new();
for column in columns {
let name = column.name().to_string();
for value in column {
conversion::insert_value(value, name.clone(), &mut column_values)?;
}
}
conversion::from_parsed_columns(column_values)
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
match input.into_value(span) {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
Some(df) => Ok(NuDataFrame(df.0.clone())),
None => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
},
_ => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
}
}
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
let s = self.0.column(column).map_err(|_| {
let possibilities = self
.0
.get_column_names()
.iter()
.map(|name| name.to_string())
.collect::<Vec<String>>();
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
ShellError::DidYouMean(option, span)
})?;
let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
ShellError::SpannedLabeledError("Error creating dataframe".into(), e.to_string(), span)
})?;
Ok(Self(dataframe))
}
pub fn is_series(&self) -> bool {
self.0.width() == 1
}
pub fn as_series(&self, span: Span) -> Result<Series, ShellError> {
if !self.is_series() {
return Err(ShellError::SpannedLabeledError(
"Error using as series".into(),
"dataframe has more than one column".into(),
span,
));
}
let series = self
.0
.get_columns()
.get(0)
.expect("We have already checked that the width is 1");
Ok(series.clone())
}
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
let series = self.as_series(Span::unknown())?;
let column = conversion::create_column(&series, row, row + 1)?;
if column.len() == 0 {
Err(ShellError::AccessBeyondEnd(series.len(), span))
} else {
let value = column
.into_iter()
.next()
.expect("already checked there is a value");
Ok(value)
}
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let size: usize = 20;
if df.height() > size {
let sample_size = size / 2;
let mut values = self.head(Some(sample_size))?;
conversion::add_separator(&mut values, df);
let remaining = df.height() - sample_size;
let tail_size = remaining.min(sample_size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
Ok(values)
} else {
Ok(self.head(Some(size))?)
}
}
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let to_row = rows.unwrap_or(5);
let values = self.to_rows(0, to_row)?;
Ok(values)
}
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row)?;
Ok(values)
}
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let upper_row = to_row.min(df.height());
let mut size: usize = 0;
let columns = self
.0
.get_columns()
.iter()
.map(
|col| match conversion::create_column(col, from_row, upper_row) {
Ok(col) => {
size = col.len();
Ok(col)
}
Err(e) => Err(e),
},
)
.collect::<Result<Vec<Column>, ShellError>>()?;
let mut iterators = columns
.into_iter()
.map(|col| (col.name().to_string(), col.into_iter()))
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
let values = (0..size)
.into_iter()
.map(|_| {
let mut cols = vec![];
let mut vals = vec![];
for (name, col) in &mut iterators {
cols.push(name.clone());
match col.next() {
Some(v) => vals.push(v),
None => vals.push(Value::Nothing {
span: Span::unknown(),
}),
};
}
Value::Record {
cols,
vals,
span: Span::unknown(),
}
})
.collect::<Vec<Value>>();
Ok(values)
}
// Dataframes are considered equal if they have the same shape, column name and values
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
if self.as_ref().width() == 0 {
// checking for empty dataframe
return None;
}
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
// checking both dataframes share the same names
return None;
}
if self.as_ref().height() != other.as_ref().height() {
// checking both dataframes have the same row size
return None;
}
// sorting dataframe by the first column
let column_names = self.as_ref().get_column_names();
let first_col = column_names
.get(0)
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return None,
};
let rhs = match other.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return None,
};
for name in self.as_ref().get_column_names() {
let self_series = lhs.column(name).expect("name from dataframe names");
let other_series = rhs
.column(name)
.expect("already checked that name in other");
let self_series = match self_series.dtype() {
// Casting needed to compare other numeric types with nushell numeric type.
// In nushell we only have i64 integer numeric types and any array created
// with nushell untagged primitives will be of type i64
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
Ok(series) => series,
Err(_) => return None,
},
_ => self_series.clone(),
};
if !self_series.series_equal(other_series) {
return None;
}
}
Some(Ordering::Equal)
}
}

View file

@ -122,7 +122,7 @@ impl NuDataFrame {
&self,
other: &NuDataFrame,
axis: Axis,
_span: Span,
span: Span,
) -> Result<Self, ShellError> {
match axis {
Axis::Row => {
@ -147,8 +147,13 @@ impl NuDataFrame {
})
.collect::<Vec<Series>>();
let df_new = DataFrame::new(new_cols)
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let df_new = DataFrame::new(new_cols).map_err(|e| {
ShellError::SpannedLabeledError(
"Error creating dataframe".into(),
e.to_string(),
span,
)
})?;
Ok(NuDataFrame::new(df_new))
} //Axis::Column => {

View file

@ -1,4 +1,4 @@
use super::super::NuDataFrame;
use super::nu_dataframe::NuDataFrame;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
@ -22,7 +22,7 @@ impl Command for OpenDataFrame {
}
fn signature(&self) -> Signature {
Signature::build(self.name().to_string())
Signature::build(self.name())
.required(
"file",
SyntaxShape::Filepath,
@ -54,7 +54,7 @@ impl Command for OpenDataFrame {
.named(
"columns",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Columns to be selected from csv file. CSV file",
"Columns to be selected from csv file. CSV and Parquet file",
None,
)
.category(Category::Custom("dataframe".into()))
@ -87,7 +87,7 @@ fn command(
let span = call.head;
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let df = match file.item.extension() {
match file.item.extension() {
Some(e) => match e.to_str() {
Some("csv") => from_csv(engine_state, stack, call),
Some("parquet") => from_parquet(engine_state, stack, call),
@ -101,11 +101,8 @@ fn command(
"File without extension".into(),
file.span,
)),
}?;
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
df, span,
)))
}
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, span), None))
}
fn from_parquet(
@ -114,12 +111,25 @@ fn from_parquet(
call: &Call,
) -> Result<polars::prelude::DataFrame, ShellError> {
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
let r = File::open(&file.item).map_err(|e| {
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
})?;
let reader = ParquetReader::new(r);
reader
.finish()
.map_err(|e| ShellError::InternalError(format!("{:?}", e)))
let reader = match columns {
None => reader,
Some(columns) => reader.with_columns(Some(columns)),
};
reader.finish().map_err(|e| {
ShellError::SpannedLabeledError(
"Parquet reader error".into(),
format!("{:?}", e),
call.head,
)
})
}
fn from_json(
@ -129,13 +139,15 @@ fn from_json(
) -> Result<polars::prelude::DataFrame, ShellError> {
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
let r = File::open(&file.item).map_err(|e| {
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
})?;
let reader = JsonReader::new(r);
reader
.finish()
.map_err(|e| ShellError::InternalError(e.to_string()))
reader.finish().map_err(|e| {
ShellError::SpannedLabeledError("Json reader error".into(), format!("{:?}", e), call.head)
})
}
fn from_csv(
@ -151,15 +163,23 @@ fn from_csv(
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
let csv_reader = CsvReader::from_path(&file.item)
.map_err(|e| ShellError::InternalError(e.to_string()))?
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error creating CSV reader".into(),
e.to_string(),
file.span,
)
})?
.with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::InternalError(
"Delimiter has to be one char".into(),
return Err(ShellError::SpannedLabeledError(
"Incorrect delimiter".into(),
"Delimiter has to be one character".into(),
d.span,
));
} else {
let delimiter = match d.item.chars().next() {
@ -188,7 +208,11 @@ fn from_csv(
Some(columns) => csv_reader.with_columns(Some(columns)),
};
csv_reader
.finish()
.map_err(|e| ShellError::InternalError(e.to_string()))
csv_reader.finish().map_err(|e| {
ShellError::SpannedLabeledError(
"Parquet reader error".into(),
format!("{:?}", e),
call.head,
)
})
}

View file

@ -1,4 +1,4 @@
use super::super::{Column, NuDataFrame};
use super::nu_dataframe::{Column, NuDataFrame};
use nu_protocol::{
ast::Call,
@ -19,7 +19,7 @@ impl Command for ToDataFrame {
}
fn signature(&self) -> Signature {
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
@ -94,8 +94,8 @@ impl Command for ToDataFrame {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_iter(input.into_iter())?;
Ok(PipelineData::Value(NuDataFrame::into_value(df, call.head)))
NuDataFrame::try_from_iter(input.into_iter())
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}
}

View file

@ -382,4 +382,25 @@ mod tests {
PluginResponse::Value(_) => panic!("returned wrong call type"),
}
}
#[test]
fn response_round_trip_error_none() {
let error = LabeledError {
label: "label".into(),
msg: "msg".into(),
span: None,
};
let response = PluginResponse::Error(error.clone());
let mut buffer: Vec<u8> = Vec::new();
encode_response(&response, &mut buffer).expect("unable to serialize message");
let returned =
decode_response(&mut buffer.as_slice()).expect("unable to deserialize message");
match returned {
PluginResponse::Error(msg) => assert_eq!(error, msg),
PluginResponse::Signature(_) => panic!("returned wrong call type"),
PluginResponse::Value(_) => panic!("returned wrong call type"),
}
}
}