diff --git a/Cargo.lock b/Cargo.lock index e1493eb4..4a265a5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,6 +31,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anyhow" version = "1.0.58" @@ -409,6 +415,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.0.73" @@ -433,6 +445,33 @@ dependencies = [ "winapi", ] +[[package]] +name = "ciborium" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" + +[[package]] +name = "ciborium-ll" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "3.2.10" @@ -572,6 +611,44 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d0165d2900ae6778e36e80bbc4da3b5eefccee9ba939761f9c2882a5d9af3ff" +[[package]] +name = "criterion" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +dependencies = [ + "anes", + "atty", + "cast", + "ciborium", + "clap", + "criterion-plot", + "futures", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + [[package]] name = "crossbeam-channel" version = "0.5.5" @@ -1060,6 +1137,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + [[package]] name = "hashbrown" version = "0.12.2" @@ -1487,6 +1570,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "openssl" version = "0.10.41" @@ -1658,6 +1747,34 @@ version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +[[package]] +name = "plotters" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" + +[[package]] +name = "plotters-svg" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" +dependencies = [ + "plotters-backend", +] + [[package]] name = "polling" version = "2.2.0" @@ -2077,6 +2194,15 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.20" @@ -2283,6 +2409,7 @@ version = "0.7.0-alpha.2" dependencies = [ "anyhow", "async-std", + "criterion", "dotenvy", "env_logger", "futures", @@ -2693,6 +2820,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -2911,6 +3048,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" +[[package]] +name = "walkdir" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index fd699de4..ba692850 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -175,6 +175,8 @@ rand = "0.8.4" rand_xoshiro = "0.6.0" hex = "0.4.3" tempdir = "0.3.7" +criterion = {version = "0.4", features = ["async_tokio"]} + # Needed to test SQLCipher libsqlite3-sys = { version = "0.25.1", features = ["bundled-sqlcipher"] } @@ -250,6 +252,12 @@ name = "sqlite-migrate" path = "tests/sqlite/migrate.rs" required-features = ["sqlite", "macros", "migrate"] +[[bench]] +name = "sqlite-describe" +path = "benches/sqlite/describe.rs" +harness = false +required-features = ["sqlite"] + # # MySQL # @@ -332,3 +340,4 @@ required-features = ["postgres", "macros", "migrate"] name = "postgres-migrate" path = "tests/postgres/migrate.rs" required-features = ["postgres", "macros", "migrate"] + diff --git a/benches/sqlite/describe.rs b/benches/sqlite/describe.rs new file mode 100644 index 00000000..470c1f73 --- /dev/null +++ b/benches/sqlite/describe.rs @@ -0,0 +1,141 @@ +use criterion::BenchmarkId; +use criterion::Criterion; +use criterion::{criterion_group, criterion_main}; + +use sqlx::sqlite::{Sqlite, SqliteConnection}; +use sqlx::{Connection, Executor}; +use sqlx_test::new; + +// Here we have an async function to benchmark +async fn do_describe_trivial(db: &std::cell::RefCell) { + db.borrow_mut().describe("select 1").await.unwrap(); +} + +async fn do_describe_recursive(db: &std::cell::RefCell) { + db.borrow_mut() + .describe( + r#" + WITH RECURSIVE schedule(begin_date) AS MATERIALIZED ( + SELECT datetime('2022-10-01') + WHERE datetime('2022-10-01') < datetime('2022-11-03') + UNION ALL + SELECT datetime(begin_date,'+1 day') + FROM schedule + WHERE datetime(begin_date) < datetime(?2) + ) + SELECT + begin_date + FROM schedule + GROUP BY begin_date + "#, + ) + .await + .unwrap(); +} + +async fn do_describe_insert(db: &std::cell::RefCell) { + db.borrow_mut() + .describe("INSERT INTO tweet (id, text) VALUES (2, 'Hello') RETURNING *") + .await + .unwrap(); +} + +async fn do_describe_insert_fks(db: &std::cell::RefCell) { + db.borrow_mut() + .describe("insert into statements (text) values ('a') returning id") + .await + .unwrap(); +} + +async fn init_connection() -> SqliteConnection { + let mut conn = new::().await.unwrap(); + + conn.execute( + r#" + CREATE TEMPORARY TABLE statements ( + id integer not null primary key, + text text not null + ); + + CREATE TEMPORARY TABLE votes1 (statement_id integer not null references statements(id)); + CREATE TEMPORARY TABLE votes2 (statement_id integer not null references statements(id)); + CREATE TEMPORARY TABLE votes3 (statement_id integer not null references statements(id)); + CREATE TEMPORARY TABLE votes4 (statement_id integer not null references statements(id)); + CREATE TEMPORARY TABLE votes5 (statement_id integer not null references statements(id)); + CREATE TEMPORARY TABLE votes6 (statement_id integer not null references statements(id)); + --CREATE TEMPORARY TABLE votes7 (statement_id integer not null references statements(id)); + --CREATE TEMPORARY TABLE votes8 (statement_id integer not null references statements(id)); + --CREATE TEMPORARY TABLE votes9 (statement_id integer not null references statements(id)); + --CREATE TEMPORARY TABLE votes10 (statement_id integer not null references statements(id)); + --CREATE TEMPORARY TABLE votes11 (statement_id integer not null references statements(id)); + "#, + ) + .await + .unwrap(); + conn +} + +fn describe_trivial(c: &mut Criterion) { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let db = std::cell::RefCell::new(runtime.block_on(init_connection())); + + c.bench_with_input( + BenchmarkId::new("select", "trivial"), + &db, + move |b, db_ref| { + // Insert a call to `to_async` to convert the bencher to async mode. + // The timing loops are the same as with the normal bencher. + b.to_async(&runtime).iter(|| do_describe_trivial(db_ref)); + }, + ); +} + +fn describe_recursive(c: &mut Criterion) { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let db = std::cell::RefCell::new(runtime.block_on(init_connection())); + + c.bench_with_input( + BenchmarkId::new("select", "recursive"), + &db, + move |b, db_ref| { + // Insert a call to `to_async` to convert the bencher to async mode. + // The timing loops are the same as with the normal bencher. + b.to_async(&runtime).iter(|| do_describe_recursive(db_ref)); + }, + ); +} + +fn describe_insert(c: &mut Criterion) { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let db = std::cell::RefCell::new(runtime.block_on(init_connection())); + + c.bench_with_input( + BenchmarkId::new("insert", "returning"), + &db, + move |b, db_ref| { + // Insert a call to `to_async` to convert the bencher to async mode. + // The timing loops are the same as with the normal bencher. + b.to_async(&runtime).iter(|| do_describe_insert(db_ref)); + }, + ); +} + +fn describe_insert_fks(c: &mut Criterion) { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let db = std::cell::RefCell::new(runtime.block_on(init_connection())); + + c.bench_with_input(BenchmarkId::new("insert", "fks"), &db, move |b, db_ref| { + // Insert a call to `to_async` to convert the bencher to async mode. + // The timing loops are the same as with the normal bencher. + b.to_async(&runtime).iter(|| do_describe_insert_fks(db_ref)); + }); +} + +criterion_group!( + benches, + describe_trivial, + describe_recursive, + describe_insert, + describe_insert_fks +); +criterion_main!(benches); diff --git a/sqlx-sqlite/src/connection/explain.rs b/sqlx-sqlite/src/connection/explain.rs index 59a63909..45be0feb 100644 --- a/sqlx-sqlite/src/connection/explain.rs +++ b/sqlx-sqlite/src/connection/explain.rs @@ -1,3 +1,4 @@ +use crate::connection::intmap::IntMap; use crate::connection::{execute, ConnectionState}; use crate::error::Error; use crate::from_row::FromRow; @@ -136,7 +137,7 @@ enum ColumnType { datatype: DataType, nullable: Option, }, - Record(Vec), + Record(IntMap), } impl Default for ColumnType { @@ -199,60 +200,37 @@ impl RegDataType { } } -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq, Hash)] enum CursorDataType { Normal { - cols: HashMap, + cols: IntMap, + is_empty: Option, }, Pseudo(i64), } impl CursorDataType { - fn from_sparse_record(record: &HashMap, is_empty: Option) -> Self { + fn from_intmap(record: &IntMap, is_empty: Option) -> Self { Self::Normal { - cols: record - .iter() - .map(|(colnum, datatype)| (*colnum, datatype.clone())) - .collect(), + cols: record.clone(), is_empty, } } fn from_dense_record(record: &Vec, is_empty: Option) -> Self { Self::Normal { - cols: (0..).zip(record.iter().cloned()).collect(), + cols: IntMap::from_dense_record(record), is_empty, } } - fn map_to_dense_record(&self, registers: &HashMap) -> Vec { - match self { - Self::Normal { cols, .. } => { - let mut rowdata = vec![ColumnType::default(); cols.len()]; - for (idx, col) in cols.iter() { - rowdata[*idx as usize] = col.clone(); - } - rowdata - } - Self::Pseudo(i) => match registers.get(i) { - Some(RegDataType::Single(ColumnType::Record(r))) => r.clone(), - _ => Vec::new(), - }, - } - } - - fn map_to_sparse_record( - &self, - registers: &HashMap, - ) -> HashMap { + fn map_to_intmap(&self, registers: &IntMap) -> IntMap { match self { Self::Normal { cols, .. } => cols.clone(), Self::Pseudo(i) => match registers.get(i) { - Some(RegDataType::Single(ColumnType::Record(r))) => { - (0..).zip(r.iter().cloned()).collect() - } - _ => HashMap::new(), + Some(RegDataType::Single(ColumnType::Record(r))) => r.clone(), + _ => IntMap::new(), }, } } @@ -292,7 +270,7 @@ fn opcode_to_type(op: &str) -> DataType { fn root_block_columns( conn: &mut ConnectionState, -) -> Result>, Error> { +) -> Result>, Error> { let table_block_columns: Vec<(i64, i64, i64, String, bool)> = execute::iter( conn, "SELECT s.dbnum, s.rootpage, col.cid as colnum, col.type, col.\"notnull\" @@ -319,7 +297,7 @@ fn root_block_columns( .map(|row| FromRow::from_row(&row?)) .collect::, Error>>()?; - let mut row_info: HashMap<(i64, i64), HashMap> = HashMap::new(); + let mut row_info: HashMap<(i64, i64), IntMap> = HashMap::new(); for (dbnum, block, colnum, datatype, notnull) in table_block_columns { let row_info = row_info.entry((dbnum, block)).or_default(); row_info.insert( @@ -340,66 +318,43 @@ struct QueryState { pub visited: Vec, // A log of the order of execution of each instruction pub history: Vec, - // Registers - pub r: HashMap, - // Rows that pointers point to - pub p: HashMap, - // Next instruction to execute - pub program_i: usize, + // State of the virtual machine + pub mem: MemoryState, // Results published by the execution pub result: Option, Option)>>, } -#[derive(Debug, Hash, PartialEq, Eq)] -struct BranchStateHash { - instruction: usize, - //register index, data type - registers: Vec<(i64, RegDataType)>, - //cursor index, is_empty, pseudo register index - cursor_metadata: Vec<(i64, Option, Option)>, - //cursor index, column index, data type - cursors: Vec<(i64, i64, Option)>, +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct MemoryState { + // Next instruction to execute + pub program_i: usize, + // Registers + pub r: IntMap, + // Rows that pointers point to + pub p: IntMap, } -impl BranchStateHash { - pub fn from_query_state(st: &QueryState) -> Self { - let mut reg = vec![]; - for (k, v) in &st.r { - reg.push((*k, v.clone())); - } - reg.sort_by_key(|v| v.0); +struct BranchList { + states: Vec, + visited_branch_state: HashSet, +} - let mut cur = vec![]; - let mut cur_meta = vec![]; - for (k, v) in &st.p { - match v { - CursorDataType::Normal { cols, is_empty } => { - cur_meta.push((*k, *is_empty, None)); - for (i, col) in cols { - cur.push((*k, *i, Some(col.clone()))); - } - } - CursorDataType::Pseudo(i) => { - cur_meta.push((*k, None, Some(*i))); - //don't bother copying columns, they are in register i - } - } - } - cur_meta.sort_by(|a, b| a.0.cmp(&b.0)); - cur.sort_by(|a, b| { - if a.0 == b.0 { - a.1.cmp(&b.1) - } else { - a.0.cmp(&b.0) - } - }); +impl BranchList { + pub fn new(state: QueryState) -> Self { Self { - instruction: st.program_i, - registers: reg, - cursor_metadata: cur_meta, - cursors: cur, + states: vec![state], + visited_branch_state: HashSet::new(), } } + pub fn push(&mut self, state: QueryState) { + if !self.visited_branch_state.contains(&state.mem) { + self.visited_branch_state.insert(state.mem.clone()); + self.states.push(state); + } + } + pub fn pop(&mut self) -> Option { + self.states.pop() + } } // Opcode Reference: https://sqlite.org/opcode.html @@ -418,24 +373,24 @@ pub(super) fn explain( let mut logger = crate::logger::QueryPlanLogger::new(query, &program, conn.log_settings.clone()); - let mut states = vec![QueryState { + let mut states = BranchList::new(QueryState { visited: vec![0; program_size], history: Vec::new(), - r: HashMap::with_capacity(6), - p: HashMap::with_capacity(6), - program_i: 0, result: None, - }]; - - let mut visited_branch_state: HashSet = HashSet::new(); + mem: MemoryState { + program_i: 0, + r: IntMap::new(), + p: IntMap::new(), + }, + }); let mut gas = MAX_TOTAL_INSTRUCTION_COUNT; let mut result_states = Vec::new(); while let Some(mut state) = states.pop() { - while state.program_i < program_size { - let (_, ref opcode, p1, p2, p3, ref p4) = program[state.program_i]; - state.history.push(state.program_i); + while state.mem.program_i < program_size { + let (_, ref opcode, p1, p2, p3, ref p4) = program[state.mem.program_i]; + state.history.push(state.mem.program_i); //limit the number of 'instructions' that can be evaluated if gas > 0 { @@ -444,7 +399,7 @@ pub(super) fn explain( break; } - if state.visited[state.program_i] > MAX_LOOP_COUNT { + if state.visited[state.mem.program_i] > MAX_LOOP_COUNT { if logger.log_enabled() { let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = state.history.iter().map(|i| &program[*i]).collect(); @@ -455,32 +410,42 @@ pub(super) fn explain( break; } - state.visited[state.program_i] += 1; + state.visited[state.mem.program_i] += 1; match &**opcode { OP_INIT => { // start at - state.program_i = p2 as usize; + state.mem.program_i = p2 as usize; continue; } OP_GOTO => { // goto - state.program_i = p2 as usize; + state.mem.program_i = p2 as usize; continue; } OP_GO_SUB => { // store current instruction in r[p1], goto - state.r.insert(p1, RegDataType::Int(state.program_i as i64)); - state.program_i = p2 as usize; + state + .mem + .r + .insert(p1, RegDataType::Int(state.mem.program_i as i64)); + state.mem.program_i = p2 as usize; continue; } - OP_DECR_JUMP_ZERO | OP_ELSE_EQ | OP_EQ | OP_FILTER | OP_FK_IF_ZERO | OP_FOUND - | OP_GE | OP_GT | OP_IDX_GE | OP_IDX_GT | OP_IDX_LE | OP_IDX_LT | OP_IF_NO_HOPE - | OP_IF_NOT | OP_IF_NOT_OPEN | OP_IF_NOT_ZERO | OP_IF_NULL_ROW | OP_IF_SMALLER + OP_FK_IF_ZERO => { + // goto if no constraints are unsatisfied (assumed to be true) + + state.mem.program_i = p2 as usize; + continue; + } + + OP_DECR_JUMP_ZERO | OP_ELSE_EQ | OP_EQ | OP_FILTER | OP_FOUND | OP_GE | OP_GT + | OP_IDX_GE | OP_IDX_GT | OP_IDX_LE | OP_IDX_LT | OP_IF_NO_HOPE | OP_IF_NOT + | OP_IF_NOT_OPEN | OP_IF_NOT_ZERO | OP_IF_NULL_ROW | OP_IF_SMALLER | OP_INCR_VACUUM | OP_IS_NULL | OP_IS_NULL_OR_TYPE | OP_LE | OP_LT | OP_NE | OP_NEXT | OP_NO_CONFLICT | OP_NOT_EXISTS | OP_ONCE | OP_PREV | OP_PROGRAM | OP_ROW_SET_READ | OP_ROW_SET_TEST | OP_SEEK_GE | OP_SEEK_GT | OP_SEEK_LE @@ -489,50 +454,42 @@ pub(super) fn explain( // goto or next instruction (depending on actual values) let mut branch_state = state.clone(); - branch_state.program_i = p2 as usize; + branch_state.mem.program_i = p2 as usize; + states.push(branch_state); - let bs_hash = BranchStateHash::from_query_state(&branch_state); - if !visited_branch_state.contains(&bs_hash) { - visited_branch_state.insert(bs_hash); - states.push(branch_state); - } - - state.program_i += 1; + state.mem.program_i += 1; continue; } OP_NOT_NULL => { // goto or next instruction (depending on actual values) - let might_branch = match state.r.get(&p1) { + let might_branch = match state.mem.r.get(&p1) { Some(r_p1) => !matches!(r_p1.map_to_datatype(), DataType::Null), _ => false, }; - let might_not_branch = match state.r.get(&p1) { + let might_not_branch = match state.mem.r.get(&p1) { Some(r_p1) => !matches!(r_p1.map_to_nullable(), Some(false)), _ => false, }; if might_branch { let mut branch_state = state.clone(); - branch_state.program_i = p2 as usize; + branch_state.mem.program_i = p2 as usize; if let Some(RegDataType::Single(ColumnType::Single { nullable, .. })) = - branch_state.r.get_mut(&p1) + branch_state.mem.r.get_mut(&p1) { *nullable = Some(false); } - let bs_hash = BranchStateHash::from_query_state(&branch_state); - if !visited_branch_state.contains(&bs_hash) { - visited_branch_state.insert(bs_hash); - states.push(branch_state); - } + states.push(branch_state); } if might_not_branch { - state.program_i += 1; + state.mem.program_i += 1; state + .mem .r .insert(p1, RegDataType::Single(ColumnType::default())); continue; @@ -548,50 +505,41 @@ pub(super) fn explain( //don't bother checking actual types, just don't branch to instruction 0 if p2 != 0 { let mut branch_state = state.clone(); - branch_state.program_i = p2 as usize; - - let bs_hash = BranchStateHash::from_query_state(&branch_state); - if !visited_branch_state.contains(&bs_hash) { - visited_branch_state.insert(bs_hash); - states.push(branch_state); - } + branch_state.mem.program_i = p2 as usize; + states.push(branch_state); } - state.program_i += 1; + state.mem.program_i += 1; continue; } OP_IF => { // goto if r[p1] is true (1) or r[p1] is null and p3 is nonzero - let might_branch = match state.r.get(&p1) { + let might_branch = match state.mem.r.get(&p1) { Some(RegDataType::Int(r_p1)) => *r_p1 != 0, _ => true, }; - let might_not_branch = match state.r.get(&p1) { + let might_not_branch = match state.mem.r.get(&p1) { Some(RegDataType::Int(r_p1)) => *r_p1 == 0, _ => true, }; if might_branch { let mut branch_state = state.clone(); - branch_state.program_i = p2 as usize; + branch_state.mem.program_i = p2 as usize; if p3 == 0 { - branch_state.r.insert(p1, RegDataType::Int(1)); + branch_state.mem.r.insert(p1, RegDataType::Int(1)); } - let bs_hash = BranchStateHash::from_query_state(&branch_state); - if !visited_branch_state.contains(&bs_hash) { - visited_branch_state.insert(bs_hash); - states.push(branch_state); - } + states.push(branch_state); } if might_not_branch { - state.program_i += 1; + state.mem.program_i += 1; if p3 == 0 { - state.r.insert(p1, RegDataType::Int(0)); + state.mem.r.insert(p1, RegDataType::Int(0)); } continue; } else { @@ -604,34 +552,34 @@ pub(super) fn explain( // as a workaround for large offset clauses, both branches will be attempted after 1 loop - let might_branch = match state.r.get(&p1) { + let might_branch = match state.mem.r.get(&p1) { Some(RegDataType::Int(r_p1)) => *r_p1 >= 1, _ => true, }; - let might_not_branch = match state.r.get(&p1) { + let might_not_branch = match state.mem.r.get(&p1) { Some(RegDataType::Int(r_p1)) => *r_p1 < 1, _ => true, }; - let loop_detected = state.visited[state.program_i] > 1; + let loop_detected = state.visited[state.mem.program_i] > 1; if might_branch || loop_detected { let mut branch_state = state.clone(); - branch_state.program_i = p2 as usize; - if let Some(RegDataType::Int(r_p1)) = branch_state.r.get_mut(&p1) { + branch_state.mem.program_i = p2 as usize; + if let Some(RegDataType::Int(r_p1)) = branch_state.mem.r.get_mut(&p1) { *r_p1 -= 1; } states.push(branch_state); } if might_not_branch { - state.program_i += 1; + state.mem.program_i += 1; continue; } else if loop_detected { - state.program_i += 1; - if matches!(state.r.get_mut(&p1), Some(RegDataType::Int(..))) { + state.mem.program_i += 1; + if matches!(state.mem.r.get_mut(&p1), Some(RegDataType::Int(..))) { //forget the exact value, in case some later cares - state.r.insert( + state.mem.r.insert( p1, RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, @@ -649,19 +597,19 @@ pub(super) fn explain( // goto if cursor p1 is empty and p2 != 0, else next instruction if p2 == 0 { - state.program_i += 1; + state.mem.program_i += 1; continue; } - if let Some(cursor) = state.p.get(&p1) { + if let Some(cursor) = state.mem.p.get(&p1) { if matches!(cursor.is_empty(), None | Some(true)) { //only take this branch if the cursor is empty let mut branch_state = state.clone(); - branch_state.program_i = p2 as usize; + branch_state.mem.program_i = p2 as usize; if let Some(CursorDataType::Normal { is_empty, .. }) = - branch_state.p.get_mut(&p1) + branch_state.mem.p.get_mut(&p1) { *is_empty = Some(true); } @@ -670,7 +618,7 @@ pub(super) fn explain( if matches!(cursor.is_empty(), None | Some(false)) { //only take this branch if the cursor is non-empty - state.program_i += 1; + state.mem.program_i += 1; continue; } else { break; @@ -689,12 +637,12 @@ pub(super) fn explain( OP_INIT_COROUTINE => { // goto or next instruction (depending on actual values) - state.r.insert(p1, RegDataType::Int(p3)); + state.mem.r.insert(p1, RegDataType::Int(p3)); if p2 != 0 { - state.program_i = p2 as usize; + state.mem.program_i = p2 as usize; } else { - state.program_i += 1; + state.mem.program_i += 1; } continue; } @@ -702,13 +650,13 @@ pub(super) fn explain( OP_END_COROUTINE => { // jump to p2 of the yield instruction pointed at by register p1 - if let Some(RegDataType::Int(yield_i)) = state.r.get(&p1) { + if let Some(RegDataType::Int(yield_i)) = state.mem.r.get(&p1) { if let Some((_, yield_op, _, yield_p2, _, _)) = program.get(*yield_i as usize) { if OP_YIELD == yield_op.as_str() { - state.program_i = (*yield_p2) as usize; - state.r.remove(&p1); + state.mem.program_i = (*yield_p2) as usize; + state.mem.r.remove(&p1); continue; } else { if logger.log_enabled() { @@ -746,9 +694,9 @@ pub(super) fn explain( OP_RETURN => { // jump to the instruction after the instruction pointed at by register p1 - if let Some(RegDataType::Int(return_i)) = state.r.get(&p1) { - state.program_i = (*return_i + 1) as usize; - state.r.remove(&p1); + if let Some(RegDataType::Int(return_i)) = state.mem.r.get(&p1) { + state.mem.program_i = (*return_i + 1) as usize; + state.mem.r.remove(&p1); continue; } else { if logger.log_enabled() { @@ -763,8 +711,8 @@ pub(super) fn explain( OP_YIELD => { // jump to p2 of the yield instruction pointed at by register p1, store prior instruction in p1 - if let Some(RegDataType::Int(yield_i)) = state.r.get_mut(&p1) { - let program_i: usize = state.program_i; + if let Some(RegDataType::Int(yield_i)) = state.mem.r.get_mut(&p1) { + let program_i: usize = state.mem.program_i; //if yielding to a yield operation, go to the NEXT instruction after that instruction if program @@ -772,11 +720,11 @@ pub(super) fn explain( .map(|(_, yield_op, _, _, _, _)| yield_op.as_str()) == Some(OP_YIELD) { - state.program_i = (*yield_i + 1) as usize; + state.mem.program_i = (*yield_i + 1) as usize; *yield_i = program_i as i64; continue; } else { - state.program_i = *yield_i as usize; + state.mem.program_i = *yield_i as usize; *yield_i = program_i as i64; continue; } @@ -794,44 +742,35 @@ pub(super) fn explain( // goto one of , , or based on the result of a prior compare let mut branch_state = state.clone(); - branch_state.program_i = p1 as usize; - let bs_hash = BranchStateHash::from_query_state(&branch_state); - if !visited_branch_state.contains(&bs_hash) { - visited_branch_state.insert(bs_hash); - states.push(branch_state); - } + branch_state.mem.program_i = p1 as usize; + states.push(branch_state); let mut branch_state = state.clone(); - branch_state.program_i = p2 as usize; - let bs_hash = BranchStateHash::from_query_state(&branch_state); - if !visited_branch_state.contains(&bs_hash) { - visited_branch_state.insert(bs_hash); - states.push(branch_state); - } + branch_state.mem.program_i = p2 as usize; + states.push(branch_state); let mut branch_state = state.clone(); - branch_state.program_i = p3 as usize; - let bs_hash = BranchStateHash::from_query_state(&branch_state); - if !visited_branch_state.contains(&bs_hash) { - visited_branch_state.insert(bs_hash); - states.push(branch_state); - } + branch_state.mem.program_i = p3 as usize; + states.push(branch_state); } OP_COLUMN => { //Get the row stored at p1, or NULL; get the column stored at p2, or NULL - if let Some(record) = state.p.get(&p1).map(|c| c.map_to_sparse_record(&state.r)) + if let Some(record) = + state.mem.p.get(&p1).map(|c| c.map_to_intmap(&state.mem.r)) { if let Some(col) = record.get(&p2) { // insert into p3 the datatype of the col - state.r.insert(p3, RegDataType::Single(col.clone())); + state.mem.r.insert(p3, RegDataType::Single(col.clone())); } else { state + .mem .r .insert(p3, RegDataType::Single(ColumnType::default())); } } else { state + .mem .r .insert(p3, RegDataType::Single(ColumnType::default())); } @@ -841,7 +780,7 @@ pub(super) fn explain( //Copy sequence number from cursor p1 to register p2, increment cursor p1 sequence number //Cursor emulation doesn't sequence value, but it is an int - state.r.insert( + state.mem.r.insert( p2, RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, @@ -852,15 +791,17 @@ pub(super) fn explain( OP_ROW_DATA | OP_SORTER_DATA => { //Get entire row from cursor p1, store it into register p2 - if let Some(record) = state.p.get(&p1) { - let rowdata = record.map_to_dense_record(&state.r); + if let Some(record) = state.mem.p.get(&p1) { + let rowdata = record.map_to_intmap(&state.mem.r); state + .mem .r .insert(p2, RegDataType::Single(ColumnType::Record(rowdata))); } else { state + .mem .r - .insert(p2, RegDataType::Single(ColumnType::Record(Vec::new()))); + .insert(p2, RegDataType::Single(ColumnType::Record(IntMap::new()))); } } @@ -870,25 +811,28 @@ pub(super) fn explain( for reg in p1..p1 + p2 { record.push( state + .mem .r .get(®) .map(|d| d.clone().map_to_columntype()) .unwrap_or(ColumnType::default()), ); } - state - .r - .insert(p3, RegDataType::Single(ColumnType::Record(record))); + state.mem.r.insert( + p3, + RegDataType::Single(ColumnType::Record(IntMap::from_dense_record(&record))), + ); } OP_INSERT | OP_IDX_INSERT | OP_SORTER_INSERT => { - if let Some(RegDataType::Single(ColumnType::Record(record))) = state.r.get(&p2) + if let Some(RegDataType::Single(ColumnType::Record(record))) = + state.mem.r.get(&p2) { if let Some(CursorDataType::Normal { cols, is_empty }) = - state.p.get_mut(&p1) + state.mem.p.get_mut(&p1) { // Insert the record into wherever pointer p1 is - *cols = (0..).zip(record.iter().cloned()).collect(); + *cols = record.clone(); *is_empty = Some(false); } } @@ -897,7 +841,8 @@ pub(super) fn explain( OP_DELETE => { // delete a record from cursor p1 - if let Some(CursorDataType::Normal { is_empty, .. }) = state.p.get_mut(&p1) { + if let Some(CursorDataType::Normal { is_empty, .. }) = state.mem.p.get_mut(&p1) + { if *is_empty == Some(false) { *is_empty = None; //the cursor might be empty now } @@ -906,7 +851,7 @@ pub(super) fn explain( OP_OPEN_PSEUDO => { // Create a cursor p1 aliasing the record from register p2 - state.p.insert(p1, CursorDataType::Pseudo(p2)); + state.mem.p.insert(p1, CursorDataType::Pseudo(p2)); } OP_OPEN_READ | OP_OPEN_WRITE => { @@ -914,22 +859,23 @@ pub(super) fn explain( if p3 == 0 || p3 == 1 { if let Some(columns) = root_block_cols.get(&(p3, p2)) { state + .mem .p - .insert(p1, CursorDataType::from_sparse_record(columns, None)); + .insert(p1, CursorDataType::from_intmap(columns, None)); } else { - state.p.insert( + state.mem.p.insert( p1, CursorDataType::Normal { - cols: HashMap::with_capacity(6), + cols: IntMap::new(), is_empty: None, }, ); } } else { - state.p.insert( + state.mem.p.insert( p1, CursorDataType::Normal { - cols: HashMap::with_capacity(6), + cols: IntMap::new(), is_empty: None, }, ); @@ -938,7 +884,7 @@ pub(super) fn explain( OP_OPEN_EPHEMERAL | OP_OPEN_AUTOINDEX | OP_SORTER_OPEN => { //Create a new pointer which is referenced by p1 - state.p.insert( + state.mem.p.insert( p1, CursorDataType::from_dense_record( &vec![ColumnType::null(); p2 as usize], @@ -949,14 +895,17 @@ pub(super) fn explain( OP_VARIABLE => { // r[p2] = - state.r.insert(p2, RegDataType::Single(ColumnType::null())); + state + .mem + .r + .insert(p2, RegDataType::Single(ColumnType::null())); } // if there is a value in p3, and the query passes, then // we know that it is not nullable OP_HALT_IF_NULL => { if let Some(RegDataType::Single(ColumnType::Single { nullable, .. })) = - state.r.get_mut(&p3) + state.mem.r.get_mut(&p3) { *nullable = Some(false); } @@ -967,7 +916,7 @@ pub(super) fn explain( match from_utf8(p4).map_err(Error::protocol)? { "last_insert_rowid(0)" => { // last_insert_rowid() -> INTEGER - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, @@ -977,7 +926,7 @@ pub(super) fn explain( } "date(-1)" | "time(-1)" | "datetime(-1)" | "strftime(-1)" => { // date|time|datetime|strftime(...) -> TEXT - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype: DataType::Text, @@ -987,7 +936,7 @@ pub(super) fn explain( } "julianday(-1)" => { // julianday(...) -> REAL - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype: DataType::Float, @@ -997,7 +946,7 @@ pub(super) fn explain( } "unixepoch(-1)" => { // unixepoch(p2...) -> INTEGER - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, @@ -1006,13 +955,14 @@ pub(super) fn explain( ); } - _ => logger.add_unknown_operation(&program[state.program_i]), + _ => logger.add_unknown_operation(&program[state.mem.program_i]), } } OP_NULL_ROW => { // all columns in cursor X are potentially nullable - if let Some(CursorDataType::Normal { ref mut cols, .. }) = state.p.get_mut(&p1) + if let Some(CursorDataType::Normal { ref mut cols, .. }) = + state.mem.p.get_mut(&p1) { for col in cols.values_mut() { if let ColumnType::Single { @@ -1037,7 +987,7 @@ pub(super) fn explain( || p4.starts_with("ntile(") { // count(_) -> INTEGER - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, @@ -1045,7 +995,7 @@ pub(super) fn explain( }), ); } else if p4.starts_with("sum(") { - if let Some(r_p2) = state.r.get(&p2) { + if let Some(r_p2) = state.mem.r.get(&p2) { let datatype = match r_p2.map_to_datatype() { DataType::Int64 => DataType::Int64, DataType::Int => DataType::Int, @@ -1053,14 +1003,14 @@ pub(super) fn explain( _ => DataType::Float, }; let nullable = r_p2.map_to_nullable(); - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype, nullable }), ); } - } else if let Some(v) = state.r.get(&p2).cloned() { + } else if let Some(v) = state.mem.r.get(&p2).cloned() { // r[p3] = AGG ( r[p2] ) - state.r.insert(p3, v); + state.mem.r.insert(p3, v); } } @@ -1074,7 +1024,7 @@ pub(super) fn explain( || p4.starts_with("ntile(") { // count(_) -> INTEGER - state.r.insert( + state.mem.r.insert( p1, RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, @@ -1086,7 +1036,7 @@ pub(super) fn explain( OP_CAST => { // affinity(r[p1]) - if let Some(v) = state.r.get_mut(&p1) { + if let Some(v) = state.mem.r.get_mut(&p1) { *v = RegDataType::Single(ColumnType::Single { datatype: affinity_to_type(p2 as u8), nullable: v.map_to_nullable(), @@ -1096,8 +1046,8 @@ pub(super) fn explain( OP_SCOPY | OP_INT_COPY => { // r[p2] = r[p1] - if let Some(v) = state.r.get(&p1).cloned() { - state.r.insert(p2, v); + if let Some(v) = state.mem.r.get(&p1).cloned() { + state.mem.r.insert(p2, v); } } @@ -1107,8 +1057,8 @@ pub(super) fn explain( for i in 0..=p3 { let src = p1 + i; let dst = p2 + i; - if let Some(v) = state.r.get(&src).cloned() { - state.r.insert(dst, v); + if let Some(v) = state.mem.r.get(&src).cloned() { + state.mem.r.insert(dst, v); } } } @@ -1120,9 +1070,12 @@ pub(super) fn explain( for i in 0..p3 { let src = p1 + i; let dst = p2 + i; - if let Some(v) = state.r.get(&src).cloned() { - state.r.insert(dst, v); - state.r.insert(src, RegDataType::Single(ColumnType::null())); + if let Some(v) = state.mem.r.get(&src).cloned() { + state.mem.r.insert(dst, v); + state + .mem + .r + .insert(src, RegDataType::Single(ColumnType::null())); } } } @@ -1130,12 +1083,12 @@ pub(super) fn explain( OP_INTEGER => { // r[p2] = p1 - state.r.insert(p2, RegDataType::Int(p1)); + state.mem.r.insert(p2, RegDataType::Int(p1)); } OP_BLOB | OP_COUNT | OP_REAL | OP_STRING8 | OP_ROWID | OP_NEWROWID => { // r[p2] = - state.r.insert( + state.mem.r.insert( p2, RegDataType::Single(ColumnType::Single { datatype: opcode_to_type(&opcode), @@ -1146,8 +1099,8 @@ pub(super) fn explain( OP_NOT => { // r[p2] = NOT r[p1] - if let Some(a) = state.r.get(&p1).cloned() { - state.r.insert(p2, a); + if let Some(a) = state.mem.r.get(&p1).cloned() { + state.mem.r.insert(p2, a); } } @@ -1156,16 +1109,19 @@ pub(super) fn explain( let idx_range = if p2 < p3 { p2..=p3 } else { p2..=p2 }; for idx in idx_range { - state.r.insert(idx, RegDataType::Single(ColumnType::null())); + state + .mem + .r + .insert(idx, RegDataType::Single(ColumnType::null())); } } OP_OR | OP_AND | OP_BIT_AND | OP_BIT_OR | OP_SHIFT_LEFT | OP_SHIFT_RIGHT | OP_ADD | OP_SUBTRACT | OP_MULTIPLY | OP_DIVIDE | OP_REMAINDER | OP_CONCAT => { // r[p3] = r[p1] + r[p2] - match (state.r.get(&p1).cloned(), state.r.get(&p2).cloned()) { + match (state.mem.r.get(&p1).cloned(), state.mem.r.get(&p2).cloned()) { (Some(a), Some(b)) => { - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype: if matches!(a.map_to_datatype(), DataType::Null) { @@ -1184,7 +1140,7 @@ pub(super) fn explain( } (Some(v), None) => { - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype: v.map_to_datatype(), @@ -1194,7 +1150,7 @@ pub(super) fn explain( } (None, Some(v)) => { - state.r.insert( + state.mem.r.insert( p3, RegDataType::Single(ColumnType::Single { datatype: v.map_to_datatype(), @@ -1209,7 +1165,7 @@ pub(super) fn explain( OP_OFFSET_LIMIT => { // r[p2] = if r[p2] < 0 { r[p1] } else if r[p1]<0 { -1 } else { r[p1] + r[p3] } - state.r.insert( + state.mem.r.insert( p2, RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, @@ -1224,7 +1180,7 @@ pub(super) fn explain( state.result = Some( (p1..p1 + p2) .map(|i| { - let coltype = state.r.get(&i); + let coltype = state.mem.r.get(&i); let sqltype = coltype.map(|d| d.map_to_datatype()).map(SqliteTypeInfo); @@ -1257,11 +1213,11 @@ pub(super) fn explain( _ => { // ignore unsupported operations // if we fail to find an r later, we just give up - logger.add_unknown_operation(&program[state.program_i]); + logger.add_unknown_operation(&program[state.mem.program_i]); } } - state.program_i += 1; + state.mem.program_i += 1; } } diff --git a/sqlx-sqlite/src/connection/intmap.rs b/sqlx-sqlite/src/connection/intmap.rs new file mode 100644 index 00000000..3bf4f886 --- /dev/null +++ b/sqlx-sqlite/src/connection/intmap.rs @@ -0,0 +1,115 @@ +/// Simplistic map implementation built on a Vec of Options (index = key) +#[derive(Debug, Clone, Eq, Default)] +pub(crate) struct IntMap( + Vec>, +); + +impl IntMap { + pub(crate) fn new() -> Self { + Self(Vec::new()) + } + + pub(crate) fn expand(&mut self, size: i64) -> usize { + let idx = size.try_into().expect("negative column index unsupported"); + while self.0.len() <= idx { + self.0.push(None); + } + idx + } + + pub(crate) fn from_dense_record(record: &Vec) -> Self { + Self(record.iter().cloned().map(Some).collect()) + } + + pub(crate) fn values_mut(&mut self) -> impl Iterator { + self.0.iter_mut().filter_map(Option::as_mut) + } + + pub(crate) fn values(&self) -> impl Iterator { + self.0.iter().filter_map(Option::as_ref) + } + + pub(crate) fn get(&self, idx: &i64) -> Option<&V> { + let idx: usize = (*idx) + .try_into() + .expect("negative column index unsupported"); + + match self.0.get(idx) { + Some(Some(v)) => Some(v), + _ => None, + } + } + + pub(crate) fn get_mut(&mut self, idx: &i64) -> Option<&mut V> { + let idx: usize = (*idx) + .try_into() + .expect("negative column index unsupported"); + match self.0.get_mut(idx) { + Some(Some(v)) => Some(v), + _ => None, + } + } + + pub(crate) fn insert(&mut self, idx: i64, value: V) -> Option { + let idx: usize = self.expand(idx); + + std::mem::replace(&mut self.0[idx], Some(value)) + } + + pub(crate) fn remove(&mut self, idx: &i64) -> Option { + let idx: usize = (*idx) + .try_into() + .expect("negative column index unsupported"); + + let item = self.0.get_mut(idx); + match item { + Some(content) => std::mem::replace(content, None), + None => None, + } + } +} + +impl std::hash::Hash for IntMap { + fn hash(&self, state: &mut H) { + for value in self.values() { + value.hash(state); + } + } +} + +impl PartialEq for IntMap { + fn eq(&self, other: &Self) -> bool { + if !self + .0 + .iter() + .zip(other.0.iter()) + .all(|(l, r)| PartialEq::eq(l, r)) + { + return false; + } + + if self.0.len() > other.0.len() { + self.0[other.0.len()..].iter().all(Option::is_none) + } else if self.0.len() < other.0.len() { + other.0[self.0.len()..].iter().all(Option::is_none) + } else { + true + } + } +} + +impl FromIterator<(i64, V)> + for IntMap +{ + fn from_iter(iter: I) -> Self + where + I: IntoIterator, + { + let mut result = Self(Vec::new()); + for (idx, val) in iter { + let idx = result.expand(idx); + result.0[idx] = Some(val); + } + result + } +} diff --git a/sqlx-sqlite/src/connection/mod.rs b/sqlx-sqlite/src/connection/mod.rs index 69879768..f17c6102 100644 --- a/sqlx-sqlite/src/connection/mod.rs +++ b/sqlx-sqlite/src/connection/mod.rs @@ -30,6 +30,7 @@ pub(crate) mod execute; mod executor; mod explain; mod handle; +mod intmap; mod worker;