Initial --params implementation (#12249)

# Description
This PR adds a `--params` param to `query db`. This closes #11643.

You can't combine both named and positional parameters, I think this
might be a limitation with rusqlite itself. I tried using named
parameters with indices like `{ ':named': 123, '1': "positional" }` but
that always failed with a rusqlite error. On the flip side, the other
way around works: for something like `VALUES (:named, ?)`, you can treat
both as positional: `-p [hello 123]`.

This PR introduces some very gnarly code repetition in
`prepared_statement_to_nu_list`. I tried, I swear; the compiler wasn't
having any of it, it kept telling me to box my closures and then it said
that the reference lifetimes were incompatible in the match arms. I gave
up and put the mapping code in the match itself, but I'm still not
happy.

Another thing I'm unhappy about: I don't like how you have to put the
`:colon` in named parameters. I think nushell should insert it if it's
[missing](https://www.sqlite.org/lang_expr.html#parameters). But this is
the way [rusqlite
works](https://docs.rs/rusqlite/latest/rusqlite/trait.Params.html#example-named),
so for now, I'll let it be consistent. Just know that it's not really a
blocker, and it isn't a compatibility change to later make `{ colon: 123
}` work, without the quotes and `:`. This would require allocating and
turning our pretty little `&str` into a `String`, though

# User-Facing Changes
Less incentive to leave yourself open to SQL injection with statements
like `query db $"INSERT INTO x VALUES \($unsafe_user_input)"`.
Additionally, the `$""` syntax being annoying with parentheses plays in
our favor, making users even more likely to use ? with `--params`.

# Tests + Formatting
Hehe
This commit is contained in:
Doru 2024-03-24 17:40:21 -03:00 committed by GitHub
parent b3721a24fa
commit d1a8992590
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 366 additions and 78 deletions

View file

@ -1,4 +1,4 @@
use crate::database::values::sqlite::open_sqlite_db; use crate::database::values::sqlite::{open_sqlite_db, values_to_sql};
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::ast::Call; use nu_protocol::ast::Call;
@ -315,42 +315,6 @@ fn insert_value(
} }
} }
// This is taken from to text local_into_string but tweaks it a bit so that certain formatting does not happen
fn value_to_sql(value: Value) -> Result<Box<dyn rusqlite::ToSql>, ShellError> {
Ok(match value {
Value::Bool { val, .. } => Box::new(val),
Value::Int { val, .. } => Box::new(val),
Value::Float { val, .. } => Box::new(val),
Value::Filesize { val, .. } => Box::new(val),
Value::Duration { val, .. } => Box::new(val),
Value::Date { val, .. } => Box::new(val),
Value::String { val, .. } => {
// don't store ansi escape sequences in the database
// escape single quotes
Box::new(nu_utils::strip_ansi_unlikely(&val).into_owned())
}
Value::Binary { val, .. } => Box::new(val),
val => {
return Err(ShellError::OnlySupportsThisInputType {
exp_input_type:
"bool, int, float, filesize, duration, date, string, nothing, binary".into(),
wrong_type: val.get_type().to_string(),
dst_span: Span::unknown(),
src_span: val.span(),
})
}
})
}
fn values_to_sql(
values: impl IntoIterator<Item = Value>,
) -> Result<Vec<Box<dyn rusqlite::ToSql>>, ShellError> {
values
.into_iter()
.map(value_to_sql)
.collect::<Result<Vec<_>, _>>()
}
// Each value stored in an SQLite database (or manipulated by the database engine) has one of the following storage classes: // Each value stored in an SQLite database (or manipulated by the database engine) has one of the following storage classes:
// NULL. The value is a NULL value. // NULL. The value is a NULL value.
// INTEGER. The value is a signed integer, stored in 0, 1, 2, 3, 4, 6, or 8 bytes depending on the magnitude of the value. // INTEGER. The value is a signed integer, stored in 0, 1, 2, 3, 4, 6, or 8 bytes depending on the magnitude of the value.

View file

@ -2,10 +2,12 @@ use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Spanned, SyntaxShape, record, Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span,
Type, Spanned, SyntaxShape, Type, Value,
}; };
use crate::database::values::sqlite::nu_value_to_params;
use super::super::SQLiteDatabase; use super::super::SQLiteDatabase;
#[derive(Clone)] #[derive(Clone)]
@ -24,6 +26,13 @@ impl Command for QueryDb {
SyntaxShape::String, SyntaxShape::String,
"SQL to execute against the database.", "SQL to execute against the database.",
) )
.named(
"params",
// TODO: Use SyntaxShape::OneOf with Records and Lists, when Lists no longer break inside OneOf
SyntaxShape::Any,
"List of parameters for the SQL statement",
Some('p'),
)
.category(Category::Database) .category(Category::Database)
} }
@ -32,11 +41,29 @@ impl Command for QueryDb {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Execute SQL against a SQLite database", Example {
example: r#"open foo.db | query db "SELECT * FROM Bar""#, description: "Execute SQL against a SQLite database",
result: None, example: r#"open foo.db | query db "SELECT * FROM Bar""#,
}] result: None,
},
Example {
description: "Execute a SQL statement with parameters",
example: r#"stor create -t my_table -c { first: str, second: int }
stor open | query db "INSERT INTO my_table VALUES (?, ?)" -p [hello 123]"#,
result: None,
},
Example {
description: "Execute a SQL statement with named parameters",
example: r#"stor create -t my_table -c { first: str, second: int }
stor insert -t my_table -d { first: 'hello', second: '123' }
stor open | query db "SELECT * FROM my_table WHERE second = :search_second" -p { search_second: 123 }"#,
result: Some(Value::test_list(vec![Value::test_record(record! {
"first" => Value::test_string("hello"),
"second" => Value::test_int(123)
})])),
},
]
} }
fn search_terms(&self) -> Vec<&str> { fn search_terms(&self) -> Vec<&str> {
@ -51,9 +78,29 @@ impl Command for QueryDb {
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let sql: Spanned<String> = call.req(engine_state, stack, 0)?; let sql: Spanned<String> = call.req(engine_state, stack, 0)?;
let params_value: Value = call
.get_flag(engine_state, stack, "params")?
.unwrap_or_else(|| Value::nothing(Span::unknown()));
let params = nu_value_to_params(params_value)?;
let db = SQLiteDatabase::try_from_pipeline(input, call.head)?; let db = SQLiteDatabase::try_from_pipeline(input, call.head)?;
db.query(&sql, call.head) db.query(&sql, params, call.head)
.map(IntoPipelineData::into_pipeline_data) .map(IntoPipelineData::into_pipeline_data)
} }
} }
#[cfg(test)]
mod test {
use crate::{StorCreate, StorInsert, StorOpen};
use super::*;
#[ignore = "stor db does not persist changes between pipelines"]
#[test]
fn test_examples() {
use crate::test_examples_with_commands;
test_examples_with_commands(QueryDb {}, &[&StorOpen, &StorCreate, &StorInsert])
}
}

View file

@ -5,6 +5,7 @@ use super::definitions::{
use nu_protocol::{CustomValue, PipelineData, Record, ShellError, Span, Spanned, Value}; use nu_protocol::{CustomValue, PipelineData, Record, ShellError, Span, Spanned, Value};
use rusqlite::{ use rusqlite::{
types::ValueRef, Connection, DatabaseName, Error as SqliteError, OpenFlags, Row, Statement, types::ValueRef, Connection, DatabaseName, Error as SqliteError, OpenFlags, Row, Statement,
ToSql,
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{ use std::{
@ -99,17 +100,23 @@ impl SQLiteDatabase {
Value::custom_value(db, span) Value::custom_value(db, span)
} }
pub fn query(&self, sql: &Spanned<String>, call_span: Span) -> Result<Value, ShellError> { pub fn query(
&self,
sql: &Spanned<String>,
params: NuSqlParams,
call_span: Span,
) -> Result<Value, ShellError> {
let conn = open_sqlite_db(&self.path, call_span)?; let conn = open_sqlite_db(&self.path, call_span)?;
let stream = let stream = run_sql_query(conn, sql, params, self.ctrlc.clone()).map_err(|e| {
run_sql_query(conn, sql, self.ctrlc.clone()).map_err(|e| ShellError::GenericError { ShellError::GenericError {
error: "Failed to query SQLite database".into(), error: "Failed to query SQLite database".into(),
msg: e.to_string(), msg: e.to_string(),
span: Some(sql.span), span: Some(sql.span),
help: None, help: None,
inner: vec![], inner: vec![],
})?; }
})?;
Ok(stream) Ok(stream)
} }
@ -428,10 +435,100 @@ pub fn open_sqlite_db(path: &Path, call_span: Span) -> Result<Connection, ShellE
fn run_sql_query( fn run_sql_query(
conn: Connection, conn: Connection,
sql: &Spanned<String>, sql: &Spanned<String>,
params: NuSqlParams,
ctrlc: Option<Arc<AtomicBool>>, ctrlc: Option<Arc<AtomicBool>>,
) -> Result<Value, SqliteError> { ) -> Result<Value, SqliteError> {
let stmt = conn.prepare(&sql.item)?; let stmt = conn.prepare(&sql.item)?;
prepared_statement_to_nu_list(stmt, sql.span, ctrlc)
prepared_statement_to_nu_list(stmt, params, sql.span, ctrlc)
}
// This is taken from to text local_into_string but tweaks it a bit so that certain formatting does not happen
pub fn value_to_sql(value: Value) -> Result<Box<dyn rusqlite::ToSql>, ShellError> {
Ok(match value {
Value::Bool { val, .. } => Box::new(val),
Value::Int { val, .. } => Box::new(val),
Value::Float { val, .. } => Box::new(val),
Value::Filesize { val, .. } => Box::new(val),
Value::Duration { val, .. } => Box::new(val),
Value::Date { val, .. } => Box::new(val),
Value::String { val, .. } => {
// don't store ansi escape sequences in the database
// escape single quotes
Box::new(nu_utils::strip_ansi_unlikely(&val).into_owned())
}
Value::Binary { val, .. } => Box::new(val),
Value::Nothing { .. } => Box::new(None::<String>),
val => {
return Err(ShellError::OnlySupportsThisInputType {
exp_input_type:
"bool, int, float, filesize, duration, date, string, nothing, binary".into(),
wrong_type: val.get_type().to_string(),
dst_span: Span::unknown(),
src_span: val.span(),
})
}
})
}
pub fn values_to_sql(
values: impl IntoIterator<Item = Value>,
) -> Result<Vec<Box<dyn rusqlite::ToSql>>, ShellError> {
values
.into_iter()
.map(value_to_sql)
.collect::<Result<Vec<_>, _>>()
}
pub enum NuSqlParams {
List(Vec<Box<dyn ToSql>>),
Named(Vec<(String, Box<dyn ToSql>)>),
}
impl Default for NuSqlParams {
fn default() -> Self {
NuSqlParams::List(Vec::new())
}
}
pub fn nu_value_to_params(value: Value) -> Result<NuSqlParams, ShellError> {
match value {
Value::Record { val, .. } => {
let mut params = Vec::with_capacity(val.len());
for (mut column, value) in val.into_iter() {
let sql_type_erased = value_to_sql(value)?;
if !column.starts_with([':', '@', '$']) {
column.insert(0, ':');
}
params.push((column, sql_type_erased));
}
Ok(NuSqlParams::Named(params))
}
Value::List { vals, .. } => {
let mut params = Vec::with_capacity(vals.len());
for value in vals.into_iter() {
let sql_type_erased = value_to_sql(value)?;
params.push(sql_type_erased);
}
Ok(NuSqlParams::List(params))
}
// We accept no parameters
Value::Nothing { .. } => Ok(NuSqlParams::default()),
_ => Err(ShellError::TypeMismatch {
err_message: "Invalid parameters value: expected record or list".to_string(),
span: value.span(),
}),
}
} }
fn read_single_table( fn read_single_table(
@ -440,12 +537,14 @@ fn read_single_table(
call_span: Span, call_span: Span,
ctrlc: Option<Arc<AtomicBool>>, ctrlc: Option<Arc<AtomicBool>>,
) -> Result<Value, SqliteError> { ) -> Result<Value, SqliteError> {
// TODO: Should use params here?
let stmt = conn.prepare(&format!("SELECT * FROM [{table_name}]"))?; let stmt = conn.prepare(&format!("SELECT * FROM [{table_name}]"))?;
prepared_statement_to_nu_list(stmt, call_span, ctrlc) prepared_statement_to_nu_list(stmt, NuSqlParams::default(), call_span, ctrlc)
} }
fn prepared_statement_to_nu_list( fn prepared_statement_to_nu_list(
mut stmt: Statement, mut stmt: Statement,
params: NuSqlParams,
call_span: Span, call_span: Span,
ctrlc: Option<Arc<AtomicBool>>, ctrlc: Option<Arc<AtomicBool>>,
) -> Result<Value, SqliteError> { ) -> Result<Value, SqliteError> {
@ -455,27 +554,68 @@ fn prepared_statement_to_nu_list(
.map(String::from) .map(String::from)
.collect::<Vec<String>>(); .collect::<Vec<String>>();
let row_results = stmt.query_map([], |row| { // I'm very sorry for this repetition
Ok(convert_sqlite_row_to_nu_value( // I tried scoping the match arms to the query_map alone, but lifetime and closure reference escapes
row, // got heavily in the way
call_span, let row_values = match params {
&column_names, NuSqlParams::List(params) => {
)) let refs: Vec<&dyn ToSql> = params.iter().map(|value| (&**value)).collect();
})?;
// we collect all rows before returning them. Not ideal but it's hard/impossible to return a stream from a CustomValue let row_results = stmt.query_map(refs.as_slice(), |row| {
let mut row_values = vec![]; Ok(convert_sqlite_row_to_nu_value(
row,
call_span,
&column_names,
))
})?;
for row_result in row_results { // we collect all rows before returning them. Not ideal but it's hard/impossible to return a stream from a CustomValue
if nu_utils::ctrl_c::was_pressed(&ctrlc) { let mut row_values = vec![];
// return whatever we have so far, let the caller decide whether to use it
return Ok(Value::list(row_values, call_span)); for row_result in row_results {
if nu_utils::ctrl_c::was_pressed(&ctrlc) {
// return whatever we have so far, let the caller decide whether to use it
return Ok(Value::list(row_values, call_span));
}
if let Ok(row_value) = row_result {
row_values.push(row_value);
}
}
row_values
} }
NuSqlParams::Named(pairs) => {
let refs: Vec<_> = pairs
.iter()
.map(|(column, value)| (column.as_str(), &**value))
.collect();
if let Ok(row_value) = row_result { let row_results = stmt.query_map(refs.as_slice(), |row| {
row_values.push(row_value); Ok(convert_sqlite_row_to_nu_value(
row,
call_span,
&column_names,
))
})?;
// we collect all rows before returning them. Not ideal but it's hard/impossible to return a stream from a CustomValue
let mut row_values = vec![];
for row_result in row_results {
if nu_utils::ctrl_c::was_pressed(&ctrlc) {
// return whatever we have so far, let the caller decide whether to use it
return Ok(Value::list(row_values, call_span));
}
if let Ok(row_value) = row_result {
row_values.push(row_value);
}
}
row_values
} }
} };
Ok(Value::list(row_values, call_span)) Ok(Value::list(row_values, call_span))
} }
@ -493,8 +633,14 @@ fn read_entire_sqlite_db(
for row in rows { for row in rows {
let table_name: String = row?; let table_name: String = row?;
// TODO: Should use params here?
let table_stmt = conn.prepare(&format!("select * from [{table_name}]"))?; let table_stmt = conn.prepare(&format!("select * from [{table_name}]"))?;
let rows = prepared_statement_to_nu_list(table_stmt, call_span, ctrlc.clone())?; let rows = prepared_statement_to_nu_list(
table_stmt,
NuSqlParams::default(),
call_span,
ctrlc.clone(),
)?;
tables.push(table_name, rows); tables.push(table_name, rows);
} }

View file

@ -2,8 +2,19 @@
use nu_protocol::engine::Command; use nu_protocol::engine::Command;
#[cfg(test)] #[cfg(test)]
/// Runs the test examples in the passed in command and check their signatures and return values.
///
/// # Panics
/// If you get a ExternalNotSupported panic, you may be using a command
/// that's not in the default working set of the test harness.
/// You may want to use test_examples_with_commands and include any other dependencies.
pub fn test_examples(cmd: impl Command + 'static) { pub fn test_examples(cmd: impl Command + 'static) {
test_examples::test_examples(cmd); test_examples::test_examples(cmd, &[]);
}
#[cfg(test)]
pub fn test_examples_with_commands(cmd: impl Command + 'static, commands: &[&dyn Command]) {
test_examples::test_examples(cmd, commands);
} }
#[cfg(test)] #[cfg(test)]
@ -27,10 +38,10 @@ mod test_examples {
}; };
use std::collections::HashSet; use std::collections::HashSet;
pub fn test_examples(cmd: impl Command + 'static) { pub fn test_examples(cmd: impl Command + 'static, commands: &[&dyn Command]) {
let examples = cmd.examples(); let examples = cmd.examples();
let signature = cmd.signature(); let signature = cmd.signature();
let mut engine_state = make_engine_state(cmd.clone_box()); let mut engine_state = make_engine_state(cmd.clone_box(), commands);
let cwd = std::env::current_dir().expect("Could not get current working directory."); let cwd = std::env::current_dir().expect("Could not get current working directory.");
@ -40,11 +51,12 @@ mod test_examples {
if example.result.is_none() { if example.result.is_none() {
continue; continue;
} }
witnessed_type_transformations.extend( witnessed_type_transformations.extend(
check_example_input_and_output_types_match_command_signature( check_example_input_and_output_types_match_command_signature(
&example, &example,
&cwd, &cwd,
&mut make_engine_state(cmd.clone_box()), &mut make_engine_state(cmd.clone_box(), commands),
&signature.input_output_types, &signature.input_output_types,
signature.operates_on_cell_paths(), signature.operates_on_cell_paths(),
), ),
@ -58,7 +70,7 @@ mod test_examples {
); );
} }
fn make_engine_state(cmd: Box<dyn Command>) -> Box<EngineState> { fn make_engine_state(cmd: Box<dyn Command>, commands: &[&dyn Command]) -> Box<EngineState> {
let mut engine_state = Box::new(EngineState::new()); let mut engine_state = Box::new(EngineState::new());
let delta = { let delta = {
@ -106,6 +118,12 @@ mod test_examples {
working_set.add_decl(Box::new(Update)); working_set.add_decl(Box::new(Update));
working_set.add_decl(Box::new(Values)); working_set.add_decl(Box::new(Values));
working_set.add_decl(Box::new(Wrap)); working_set.add_decl(Box::new(Wrap));
// Add any extra commands that the test harness needs
for command in commands {
working_set.add_decl(command.clone_box());
}
// Adding the command that is being tested to the working set // Adding the command that is being tested to the working set
working_set.add_decl(cmd); working_set.add_decl(cmd);

View file

@ -37,7 +37,7 @@ pub use debug::*;
pub use default_context::*; pub use default_context::*;
pub use env::*; pub use env::*;
#[cfg(test)] #[cfg(test)]
pub use example_test::test_examples; pub use example_test::{test_examples, test_examples_with_commands};
pub use experimental::*; pub use experimental::*;
pub use filesystem::*; pub use filesystem::*;
pub use filters::*; pub use filters::*;

View file

@ -84,10 +84,7 @@ fn process(
if let Ok(conn) = db.open_connection() { if let Ok(conn) = db.open_connection() {
match columns { match columns {
Some(record) => { Some(record) => {
let mut create_stmt = format!( let mut create_stmt = format!("CREATE TABLE {} ( ", new_table_name);
"CREATE TABLE {} ( id INTEGER NOT NULL PRIMARY KEY, ",
new_table_name
);
for (column_name, column_datatype) in record { for (column_name, column_datatype) in record {
match column_datatype.coerce_str()?.as_ref() { match column_datatype.coerce_str()?.as_ref() {
"int" => { "int" => {

View file

@ -1 +1,2 @@
mod into_sqlite; mod into_sqlite;
mod query_db;

View file

@ -0,0 +1,115 @@
use nu_test_support::{nu, nu_repl_code, playground::Playground};
// Multiple nu! calls don't persist state, so we can't store it in a function
const DATABASE_INIT: &str = r#"stor open | query db "CREATE TABLE IF NOT EXISTS test_db (
name TEXT,
age INTEGER,
height REAL,
serious BOOLEAN,
created_at DATETIME,
largest_file INTEGER,
time_slept INTEGER,
null_field TEXT,
data BLOB
)""#;
#[test]
fn data_types() {
Playground::setup("empty", |_, _| {
let results = nu!(nu_repl_code(&[
DATABASE_INIT,
// Add row with our data types
r#"stor open
| query db "INSERT INTO test_db VALUES (
'nimurod',
20,
6.0,
true,
date('2024-03-23T00:15:24-03:00'),
72400000,
1000000,
NULL,
x'68656c6c6f'
)"
"#,
// Query our table with the row we just added to get its nushell types
r#"
stor open | query db "SELECT * FROM test_db" | first | values | each { describe } | str join "-"
"#
]));
// Assert data types match. Booleans are mapped to "numeric" due to internal SQLite representations:
// https://www.sqlite.org/datatype3.html
// They are simply 1 or 0 in practice, but the column could contain any valid SQLite value
assert_eq!(
results.out,
"string-int-float-int-string-int-int-nothing-binary"
);
});
}
#[test]
fn ordered_params() {
Playground::setup("empty", |_, _| {
let results = nu!(nu_repl_code(&[
DATABASE_INIT,
// Add row with our data types
r#"(stor open
| query db "INSERT INTO test_db VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
-p [ 'nimurod', 20, 6.0, true, ('2024-03-23T00:15:24-03:00' | into datetime), 72.4mb, 1ms, null, ("hello" | into binary) ]
)"#,
// Query our nu values and types
r#"
let values = (stor open | query db "SELECT * FROM test_db" | first | values);
($values | str join '-') + "_" + ($values | each { describe } | str join '-')
"#
]));
assert_eq!(
results.out,
"nimurod-20-6-1-2024-03-23 00:15:24-03:00-72400000-1000000--[104, 101, 108, 108, 111]_\
string-int-float-int-string-int-int-nothing-binary"
);
});
}
#[test]
fn named_params() {
Playground::setup("empty", |_, _| {
let results = nu!(nu_repl_code(&[
DATABASE_INIT,
// Add row with our data types. query db should support all possible named parameters
// @-prefixed, $-prefixed, and :-prefixed
// But :prefix is the "blessed" way to do it, and as such, the only one that's
// promoted to from a bare word `key: value` property in the record
// In practice, users should not use @param or $param
r#"(stor open
| query db "INSERT INTO test_db VALUES (:name, :age, @height, $serious, :created_at, :largest_file, :time_slept, :null_field, :data)"
-p {
name: 'nimurod',
':age': 20,
'@height': 6.0,
'$serious': true,
created_at: ('2024-03-23T00:15:24-03:00' | into datetime),
largest_file: 72.4mb,
time_slept: 1ms,
null_field: null,
data: ("hello" | into binary)
}
)"#,
// Query our nu values and types
r#"
let values = (stor open | query db "SELECT * FROM test_db" | first | values);
($values | str join '-') + "_" + ($values | each { describe } | str join '-')
"#
]));
assert_eq!(
results.out,
"nimurod-20-6-1-2024-03-23 00:15:24-03:00-72400000-1000000--[104, 101, 108, 108, 111]_\
string-int-float-int-string-int-int-nothing-binary"
);
});
}