nushell/crates/nu-command/tests/commands/database/into_sqlite.rs
Stefan Holderbach 406df7f208
Avoid taking unnecessary ownership of intermediates (#12740)
# Description

Judiciously try to avoid allocations/clone by changing the signature of
functions

- **Don't pass str by value unnecessarily if only read**
- **Don't require a vec in `Sandbox::with_files`**
- **Remove unnecessary string clone**
- **Fixup unnecessary borrow**
- **Use `&str` in shape color instead**
- **Vec -> Slice**
- **Elide string clone**
- **Elide `Path` clone**
- **Take &str to elide clone in tests**

# User-Facing Changes
None

# Tests + Formatting
This touches many tests purely in changing from owned to borrowed/static
data
2024-05-04 00:53:15 +00:00

485 lines
16 KiB
Rust

use std::{io::Write, path::PathBuf};
use chrono::{DateTime, FixedOffset};
use nu_protocol::{ast::PathMember, record, Span, Value};
use nu_test_support::{
fs::{line_ending, Stub},
nu, pipeline,
playground::{Dirs, Playground},
};
use rand::{
distributions::{Alphanumeric, DistString, Standard},
prelude::Distribution,
rngs::StdRng,
Rng, SeedableRng,
};
#[test]
fn into_sqlite_schema() {
Playground::setup("schema", |dirs, _| {
let testdb = make_sqlite_db(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary];
[true, 1, 2.0, 1kb, 1sec, "2023-09-10 11:30:00", "foo", ("binary" | into binary)],
[false, 2, 3.0, 2mb, 4wk, "2020-09-10 12:30:00", "bar", ("wut" | into binary)],
]"#,
);
let conn = rusqlite::Connection::open(testdb).unwrap();
let mut stmt = conn.prepare("SELECT * FROM pragma_table_info(?1)").unwrap();
let actual_rows: Vec<_> = stmt
.query_and_then(["main"], |row| -> rusqlite::Result<_, rusqlite::Error> {
let name: String = row.get("name").unwrap();
let col_type: String = row.get("type").unwrap();
Ok((name, col_type))
})
.unwrap()
.map(|row| row.unwrap())
.collect();
let expected_rows = vec![
("somebool".into(), "BOOLEAN".into()),
("someint".into(), "INTEGER".into()),
("somefloat".into(), "REAL".into()),
("somefilesize".into(), "INTEGER".into()),
("someduration".into(), "BIGINT".into()),
("somedate".into(), "TEXT".into()),
("somestring".into(), "TEXT".into()),
("somebinary".into(), "BLOB".into()),
];
assert_eq!(expected_rows, actual_rows);
});
}
#[test]
fn into_sqlite_values() {
Playground::setup("values", |dirs, _| {
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
]"#,
None,
vec![
TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Integer(1),
),
TestRow(
false,
2,
3.0,
2000000,
2419200000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(),
b"wut".to_vec(),
rusqlite::types::Value::Null,
),
],
);
});
}
/// When we create a new table, we use the first row to infer the schema of the
/// table. In the event that a column is null, we can't know what type the row
/// should be, so we just assume TEXT.
#[test]
fn into_sqlite_values_first_column_null() {
Playground::setup("values", |dirs, _| {
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
]"#,
None,
vec![
TestRow(
false,
2,
3.0,
2000000,
2419200000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(),
b"wut".to_vec(),
rusqlite::types::Value::Null,
),
TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Text("1".into()),
),
],
);
});
}
/// If the DB / table already exist, then the insert should end up with the
/// right data types no matter if the first row is null or not.
#[test]
fn into_sqlite_values_first_column_null_preexisting_db() {
Playground::setup("values", |dirs, _| {
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
]"#,
None,
vec![
TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Integer(1),
),
TestRow(
false,
2,
3.0,
2000000,
2419200000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(),
b"wut".to_vec(),
rusqlite::types::Value::Null,
),
],
);
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[true, 3, 5.0, 3.1mb, 1wk, "2020-09-10T12:30:00-00:00", "baz", ("huh" | into binary), null],
[true, 3, 5.0, 3.1mb, 1wk, "2020-09-10T12:30:00-00:00", "baz", ("huh" | into binary), 3],
]"#,
None,
vec![
TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Integer(1),
),
TestRow(
false,
2,
3.0,
2000000,
2419200000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(),
b"wut".to_vec(),
rusqlite::types::Value::Null,
),
TestRow(
true,
3,
5.0,
3100000,
604800000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"baz".into(),
b"huh".to_vec(),
rusqlite::types::Value::Null,
),
TestRow(
true,
3,
5.0,
3100000,
604800000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"baz".into(),
b"huh".to_vec(),
rusqlite::types::Value::Integer(3),
),
],
);
});
}
/// Opening a preexisting database should append to it
#[test]
fn into_sqlite_existing_db_append() {
Playground::setup("existing_db_append", |dirs, _| {
// create a new DB with only one row
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), null],
]"#,
None,
vec![TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Null,
)],
);
// open the same DB again and write one row
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
]"#,
None,
// it should have both rows
vec![
TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Null,
),
TestRow(
false,
2,
3.0,
2000000,
2419200000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(),
b"wut".to_vec(),
rusqlite::types::Value::Null,
),
],
);
});
}
/// Test inserting a good number of randomly generated rows to test an actual
/// streaming pipeline instead of a simple value
#[test]
fn into_sqlite_big_insert() {
Playground::setup("big_insert", |dirs, playground| {
const NUM_ROWS: usize = 10_000;
const NUON_FILE_NAME: &str = "data.nuon";
let nuon_path = dirs.test().join(NUON_FILE_NAME);
playground.with_files(&[Stub::EmptyFile(&nuon_path.to_string_lossy())]);
let mut expected_rows = Vec::new();
let mut nuon_file = std::fs::OpenOptions::new()
.write(true)
.open(&nuon_path)
.unwrap();
// write the header
for row in std::iter::repeat_with(TestRow::random).take(NUM_ROWS) {
let mut value: Value = row.clone().into();
// HACK: Convert to a string to get around this: https://github.com/nushell/nushell/issues/9186
value
.upsert_cell_path(
&[PathMember::String {
val: "somedate".into(),
span: Span::unknown(),
optional: false,
}],
Box::new(|dateval| {
Value::string(dateval.coerce_string().unwrap(), dateval.span())
}),
)
.unwrap();
let nuon = nuon::to_nuon(&value, nuon::ToStyle::Raw, Some(Span::unknown())).unwrap()
+ &line_ending();
nuon_file.write_all(nuon.as_bytes()).unwrap();
expected_rows.push(row);
}
insert_test_rows(
&dirs,
&format!(
"open --raw {} | lines | each {{ from nuon }}",
nuon_path.to_string_lossy()
),
None,
expected_rows,
);
});
}
/// empty in, empty out
#[test]
fn into_sqlite_empty() {
Playground::setup("empty", |dirs, _| {
insert_test_rows(&dirs, r#"[]"#, Some("SELECT * FROM sqlite_schema;"), vec![]);
});
}
#[derive(Debug, PartialEq, Clone)]
struct TestRow(
bool,
i64,
f64,
i64,
i64,
chrono::DateTime<chrono::FixedOffset>,
std::string::String,
std::vec::Vec<u8>,
rusqlite::types::Value,
);
impl TestRow {
pub fn random() -> Self {
StdRng::from_entropy().sample(Standard)
}
}
impl From<TestRow> for Value {
fn from(row: TestRow) -> Self {
Value::record(
record! {
"somebool" => Value::bool(row.0, Span::unknown()),
"someint" => Value::int(row.1, Span::unknown()),
"somefloat" => Value::float(row.2, Span::unknown()),
"somefilesize" => Value::filesize(row.3, Span::unknown()),
"someduration" => Value::duration(row.4, Span::unknown()),
"somedate" => Value::date(row.5, Span::unknown()),
"somestring" => Value::string(row.6, Span::unknown()),
"somebinary" => Value::binary(row.7, Span::unknown()),
"somenull" => Value::nothing(Span::unknown()),
},
Span::unknown(),
)
}
}
impl<'r> TryFrom<&rusqlite::Row<'r>> for TestRow {
type Error = rusqlite::Error;
fn try_from(row: &rusqlite::Row) -> Result<Self, Self::Error> {
let somebool: bool = row.get("somebool").unwrap();
let someint: i64 = row.get("someint").unwrap();
let somefloat: f64 = row.get("somefloat").unwrap();
let somefilesize: i64 = row.get("somefilesize").unwrap();
let someduration: i64 = row.get("someduration").unwrap();
let somedate: DateTime<FixedOffset> = row.get("somedate").unwrap();
let somestring: String = row.get("somestring").unwrap();
let somebinary: Vec<u8> = row.get("somebinary").unwrap();
let somenull: rusqlite::types::Value = row.get("somenull").unwrap();
Ok(TestRow(
somebool,
someint,
somefloat,
somefilesize,
someduration,
somedate,
somestring,
somebinary,
somenull,
))
}
}
impl Distribution<TestRow> for Standard {
fn sample<R>(&self, rng: &mut R) -> TestRow
where
R: rand::Rng + ?Sized,
{
let dt = DateTime::from_timestamp_millis(rng.gen_range(0..2324252554000))
.unwrap()
.fixed_offset();
let rand_string = Alphanumeric.sample_string(rng, 10);
// limit the size of the numbers to work around
// https://github.com/nushell/nushell/issues/10612
let filesize = rng.gen_range(-1024..=1024);
let duration = rng.gen_range(-1024..=1024);
TestRow(
rng.gen(),
rng.gen(),
rng.gen(),
filesize,
duration,
dt,
rand_string,
rng.gen::<u64>().to_be_bytes().to_vec(),
rusqlite::types::Value::Null,
)
}
}
fn make_sqlite_db(dirs: &Dirs, nu_table: &str) -> PathBuf {
let testdir = dirs.test();
let testdb_path =
testdir.join(testdir.file_name().unwrap().to_str().unwrap().to_owned() + ".db");
let testdb = testdb_path.to_str().unwrap();
let nucmd = nu!(
cwd: testdir,
pipeline(&format!("{nu_table} | into sqlite {testdb}"))
);
assert!(nucmd.status.success());
testdb_path
}
fn insert_test_rows(dirs: &Dirs, nu_table: &str, sql_query: Option<&str>, expected: Vec<TestRow>) {
let sql_query = sql_query.unwrap_or("SELECT * FROM main;");
let testdb = make_sqlite_db(dirs, nu_table);
let conn = rusqlite::Connection::open(testdb).unwrap();
let mut stmt = conn.prepare(sql_query).unwrap();
let actual_rows: Vec<_> = stmt
.query_and_then([], |row| TestRow::try_from(row))
.unwrap()
.map(|row| row.unwrap())
.collect();
assert_eq!(expected, actual_rows);
}