nushell/crates/nu_plugin_to_bson/src/to_bson.rs

286 lines
11 KiB
Rust
Raw Normal View History

2019-08-26 14:16:34 +00:00
use bson::{encode_document, oid::ObjectId, spec::BinarySubtype, Bson, Document};
use nu_errors::{CoerceInto, ShellError};
use nu_protocol::{
Dictionary, Primitive, ReturnSuccess, ReturnValue, SpannedTypeName, UnspannedPathMember,
UntaggedValue, Value,
};
use nu_source::{Tag, TaggedItem};
use num_traits::ToPrimitive;
2019-08-26 14:16:34 +00:00
use std::convert::TryInto;
#[derive(Default)]
pub struct ToBson {
pub state: Vec<Value>,
}
2019-09-04 01:50:23 +00:00
impl ToBson {
pub fn new() -> ToBson {
ToBson { state: vec![] }
2019-09-04 01:50:23 +00:00
}
2019-08-26 14:16:34 +00:00
}
pub fn value_to_bson_value(v: &Value) -> Result<Bson, ShellError> {
Ok(match &v.value {
UntaggedValue::Primitive(Primitive::Boolean(b)) => Bson::Boolean(*b),
// FIXME: What about really big decimals?
2020-07-11 02:17:37 +00:00
UntaggedValue::Primitive(Primitive::Filesize(decimal)) => Bson::FloatingPoint(
(decimal)
.to_f64()
.expect("Unimplemented BUG: What about big decimals?"),
),
UntaggedValue::Primitive(Primitive::Duration(i)) => Bson::String(i.to_string()),
UntaggedValue::Primitive(Primitive::Date(d)) => Bson::UtcDatetime((*d).into()),
UntaggedValue::Primitive(Primitive::EndOfStream) => Bson::Null,
UntaggedValue::Primitive(Primitive::BeginningOfStream) => Bson::Null,
UntaggedValue::Primitive(Primitive::Decimal(d)) => {
Bson::FloatingPoint(d.to_f64().ok_or_else(|| {
ShellError::labeled_error(
"Could not convert value to decimal",
"could not convert to decimal",
&v.tag,
)
})?)
}
UntaggedValue::Primitive(Primitive::Int(i)) => Bson::I64(*i),
UntaggedValue::Primitive(Primitive::BigInt(i)) => {
Bson::I64(i.tagged(&v.tag).coerce_into("converting to BSON")?)
}
UntaggedValue::Primitive(Primitive::Nothing) => Bson::Null,
UntaggedValue::Primitive(Primitive::String(s)) => Bson::String(s.clone()),
UntaggedValue::Primitive(Primitive::ColumnPath(path)) => Bson::Array(
2019-11-04 15:47:03 +00:00
path.iter()
.map(|x| match &x.unspanned {
UnspannedPathMember::String(string) => Ok(Bson::String(string.clone())),
UnspannedPathMember::Int(int) => Ok(Bson::I64(*int)),
2019-11-04 15:47:03 +00:00
})
.collect::<Result<Vec<Bson>, ShellError>>()?,
),
UntaggedValue::Primitive(Primitive::GlobPattern(p)) => Bson::String(p.clone()),
UntaggedValue::Primitive(Primitive::FilePath(s)) => Bson::String(s.display().to_string()),
UntaggedValue::Table(l) => Bson::Array(
l.iter()
.map(|x| value_to_bson_value(x))
.collect::<Result<_, _>>()?,
),
Add Range and start Signature support This commit contains two improvements: - Support for a Range syntax (and a corresponding Range value) - Work towards a signature syntax Implementing the Range syntax resulted in cleaning up how operators in the core syntax works. There are now two kinds of infix operators - tight operators (`.` and `..`) - loose operators Tight operators may not be interspersed (`$it.left..$it.right` is a syntax error). Loose operators require whitespace on both sides of the operator, and can be arbitrarily interspersed. Precedence is left to right in the core syntax. Note that delimited syntax (like `( ... )` or `[ ... ]`) is a single token node in the core syntax. A single token node can be parsed from beginning to end in a context-free manner. The rule for `.` is `<token node>.<member>`. The rule for `..` is `<token node>..<token node>`. Loose operators all have the same syntactic rule: `<token node><space><loose op><space><token node>`. The second aspect of this pull request is the beginning of support for a signature syntax. Before implementing signatures, a necessary prerequisite is for the core syntax to support multi-line programs. That work establishes a few things: - `;` and newlines are handled in the core grammar, and both count as "separators" - line comments begin with `#` and continue until the end of the line In this commit, multi-token productions in the core grammar can use separators interchangably with spaces. However, I think we will ultimately want a different rule preventing separators from occurring before an infix operator, so that the end of a line is always unambiguous. This would avoid gratuitous differences between modules and repl usage. We already effectively have this rule, because otherwise `x<newline> | y` would be a single pipeline, but of course that wouldn't work.
2019-12-04 21:14:52 +00:00
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => Bson::Null,
2021-05-13 09:49:46 +00:00
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => Bson::Null,
UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Primitive(Primitive::Binary(b)) => {
Bson::Binary(BinarySubtype::Generic, b.clone())
}
UntaggedValue::Row(o) => object_value_to_bson(o)?,
})
2019-08-26 14:16:34 +00:00
}
// object_value_to_bson handles all Objects, even those that correspond to special
// types (things like regex or javascript code).
fn object_value_to_bson(o: &Dictionary) -> Result<Bson, ShellError> {
2019-08-26 14:16:34 +00:00
let mut it = o.entries.iter();
if it.len() > 2 {
return generic_object_value_to_bson(o);
}
match it.next() {
Some((regex, tagged_regex_value)) if regex == "$regex" => match it.next() {
Some((options, tagged_opts_value)) if options == "$options" => {
let r: Result<String, _> = tagged_regex_value.try_into();
let opts: Result<String, _> = tagged_opts_value.try_into();
match (r, opts) {
(Ok(r), Ok(opts)) => Ok(Bson::RegExp(r, opts)),
_ => generic_object_value_to_bson(o),
2019-08-26 14:16:34 +00:00
}
}
_ => generic_object_value_to_bson(o),
},
Some((javascript, tagged_javascript_value)) if javascript == "$javascript" => {
match it.next() {
Some((scope, tagged_scope_value)) if scope == "$scope" => {
let js: Result<String, _> = tagged_javascript_value.try_into();
let s: Result<&Dictionary, _> = tagged_scope_value.try_into();
match (js, s) {
(Ok(js), Ok(s)) => {
if let Bson::Document(doc) = object_value_to_bson(s)? {
Ok(Bson::JavaScriptCodeWithScope(js, doc))
} else {
generic_object_value_to_bson(o)
}
2019-08-26 14:16:34 +00:00
}
_ => generic_object_value_to_bson(o),
2019-08-26 14:16:34 +00:00
}
}
None => {
let js: Result<String, _> = tagged_javascript_value.try_into();
match js {
Err(_) => generic_object_value_to_bson(o),
Ok(v) => Ok(Bson::JavaScriptCode(v)),
2019-08-26 14:16:34 +00:00
}
}
_ => generic_object_value_to_bson(o),
}
}
Some((timestamp, tagged_timestamp_value)) if timestamp == "$timestamp" => {
let ts: Result<i64, _> = tagged_timestamp_value.try_into();
if let Ok(ts) = ts {
Ok(Bson::TimeStamp(ts))
2019-08-26 14:16:34 +00:00
} else {
generic_object_value_to_bson(o)
2019-08-26 14:16:34 +00:00
}
}
Some((binary_subtype, tagged_binary_subtype_value))
if binary_subtype == "$binary_subtype" =>
{
match it.next() {
Some((binary, tagged_bin_value)) if binary == "$binary" => {
let bst = get_binary_subtype(tagged_binary_subtype_value);
let bin: Result<Vec<u8>, _> = tagged_bin_value.try_into();
match (bin, bst) {
(Ok(bin), Ok(v)) => Ok(Bson::Binary(v, bin)),
_ => generic_object_value_to_bson(o),
2019-08-26 14:16:34 +00:00
}
}
_ => generic_object_value_to_bson(o),
}
}
Some((object_id, tagged_object_id_value)) if object_id == "$object_id" => {
let obj_id: Result<String, _> = tagged_object_id_value.try_into();
if let Ok(obj_id) = obj_id {
let obj_id = ObjectId::with_string(&obj_id);
if let Ok(obj_id) = obj_id {
Ok(Bson::ObjectId(obj_id))
2019-08-26 14:16:34 +00:00
} else {
generic_object_value_to_bson(o)
2019-08-26 14:16:34 +00:00
}
} else {
generic_object_value_to_bson(o)
2019-08-26 14:16:34 +00:00
}
}
Some((symbol, tagged_symbol_value)) if symbol == "$symbol" => {
let sym: Result<String, _> = tagged_symbol_value.try_into();
if let Ok(sym) = sym {
Ok(Bson::Symbol(sym))
2019-08-26 14:16:34 +00:00
} else {
generic_object_value_to_bson(o)
2019-08-26 14:16:34 +00:00
}
}
_ => generic_object_value_to_bson(o),
}
}
fn get_binary_subtype(tagged_value: &Value) -> Result<BinarySubtype, ShellError> {
match &tagged_value.value {
UntaggedValue::Primitive(Primitive::String(s)) => Ok(match s.as_ref() {
2019-08-26 14:16:34 +00:00
"generic" => BinarySubtype::Generic,
"function" => BinarySubtype::Function,
"binary_old" => BinarySubtype::BinaryOld,
"uuid_old" => BinarySubtype::UuidOld,
"uuid" => BinarySubtype::Uuid,
"md5" => BinarySubtype::Md5,
_ => unreachable!(),
}),
UntaggedValue::Primitive(Primitive::BigInt(i)) => Ok(BinarySubtype::UserDefined(
i.tagged(&tagged_value.tag)
.coerce_into("converting to BSON binary subtype")?,
)),
_ => Err(ShellError::type_error(
"bson binary",
tagged_value.spanned_type_name(),
)),
2019-08-26 14:16:34 +00:00
}
}
// generic_object_value_bson handles any Object that does not
// correspond to a special bson type (things like regex or javascript code).
fn generic_object_value_to_bson(o: &Dictionary) -> Result<Bson, ShellError> {
2019-08-26 14:16:34 +00:00
let mut doc = Document::new();
for (k, v) in o.entries.iter() {
doc.insert(k.clone(), value_to_bson_value(v)?);
2019-08-26 14:16:34 +00:00
}
Ok(Bson::Document(doc))
2019-08-26 14:16:34 +00:00
}
fn shell_encode_document(writer: &mut Vec<u8>, doc: Document, tag: Tag) -> Result<(), ShellError> {
2019-08-26 14:16:34 +00:00
match encode_document(writer, &doc) {
Err(e) => Err(ShellError::labeled_error(
format!("Failed to encode document due to: {:?}", e),
"requires BSON-compatible document",
tag,
2019-08-26 14:16:34 +00:00
)),
_ => Ok(()),
}
}
fn bson_value_to_bytes(bson: Bson, tag: Tag) -> Result<Vec<u8>, ShellError> {
2019-08-26 14:16:34 +00:00
let mut out = Vec::new();
match bson {
Bson::Array(a) => {
for v in a.into_iter() {
match v {
Bson::Document(d) => shell_encode_document(&mut out, d, tag.clone())?,
2019-08-26 14:16:34 +00:00
_ => {
return Err(ShellError::labeled_error(
format!("All top level values must be Documents, got {:?}", v),
"requires BSON-compatible document",
&tag,
2019-08-26 14:16:34 +00:00
))
}
}
}
}
Bson::Document(d) => shell_encode_document(&mut out, d, tag)?,
2019-08-26 14:16:34 +00:00
_ => {
return Err(ShellError::labeled_error(
format!("All top level values must be Documents, got {:?}", bson),
"requires BSON-compatible document",
tag,
2019-08-26 14:16:34 +00:00
))
}
}
Ok(out)
}
pub fn to_bson(input: Vec<Value>, name_tag: Tag) -> Vec<ReturnValue> {
let name_span = name_tag.span;
let to_process_input = match input.len() {
x if x > 1 => {
let tag = input[0].tag.clone();
vec![Value {
value: UntaggedValue::Table(input),
tag,
}]
}
1 => input,
_ => vec![],
};
to_process_input
.into_iter()
.map(move |value| match value_to_bson_value(&value) {
Ok(bson_value) => {
let value_span = value.tag.span;
match bson_value_to_bytes(bson_value, name_tag.clone()) {
Ok(x) => ReturnSuccess::value(UntaggedValue::binary(x).into_value(name_span)),
_ => Err(ShellError::labeled_error_with_secondary(
"Expected a table with BSON-compatible structure from pipeline",
"requires BSON-compatible input",
name_span,
"originates from here".to_string(),
value_span,
)),
}
}
_ => Err(ShellError::labeled_error(
"Expected a table with BSON-compatible structure from pipeline",
"requires BSON-compatible input",
&name_tag,
)),
})
.collect()
}