From 739397a90704377159e52581fd5e631cc3449dd8 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 3 Apr 2026 08:05:40 +0530 Subject: [PATCH 01/74] dst --- Cargo.lock | 14 ++ Cargo.toml | 1 + .../src/locking_tx_datastore/datastore.rs | 26 ++++ .../src/locking_tx_datastore/lock_trace.rs | 57 +++++++ .../datastore/src/locking_tx_datastore/mod.rs | 2 + crates/dst/Cargo.toml | 19 +++ crates/dst/src/datastore.rs | 141 ++++++++++++++++++ crates/dst/src/lib.rs | 6 + crates/dst/src/runner.rs | 9 ++ crates/dst/src/scheduler.rs | 128 ++++++++++++++++ crates/dst/src/seed.rs | 62 ++++++++ crates/dst/src/sync.rs | 101 +++++++++++++ crates/dst/src/trace.rs | 24 +++ 13 files changed, 590 insertions(+) create mode 100644 crates/datastore/src/locking_tx_datastore/lock_trace.rs create mode 100644 crates/dst/Cargo.toml create mode 100644 crates/dst/src/datastore.rs create mode 100644 crates/dst/src/lib.rs create mode 100644 crates/dst/src/runner.rs create mode 100644 crates/dst/src/scheduler.rs create mode 100644 crates/dst/src/seed.rs create mode 100644 crates/dst/src/sync.rs create mode 100644 crates/dst/src/trace.rs diff --git a/Cargo.lock b/Cargo.lock index 03618187864..b7d88bc42dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8131,6 +8131,20 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "spacetimedb-dst" +version = "2.1.0" +dependencies = [ + "anyhow", + "pretty_assertions", + "spacetimedb-datastore", + "spacetimedb-lib 2.1.0", + "spacetimedb-primitives 2.1.0", + "spacetimedb-sats 2.1.0", + "spacetimedb-schema", + "spacetimedb-table", +] + [[package]] name = "spacetimedb-durability" version = "2.1.0" diff --git a/Cargo.toml b/Cargo.toml index 49c1fa355da..454352029a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "crates/commitlog", "crates/core", "crates/data-structures", + "crates/dst", "crates/datastore", "crates/durability", "crates/execution", diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index 53327b55c8b..2c55ab08f7c 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -1,3 +1,5 @@ +#[cfg(any(test, feature = "test"))] +use super::lock_trace::{self, LockEvent, LockEventKind}; use super::{ committed_state::CommittedState, mut_tx::MutTxId, sequence::SequencesState, state_view::StateView, tx::TxId, tx_state::TxState, @@ -374,7 +376,15 @@ impl Tx for Locking { let ctx = ExecutionContext::with_workload(self.database_identity, workload); let timer = Instant::now(); + #[cfg(any(test, feature = "test"))] + lock_trace::emit(LockEvent { + kind: LockEventKind::BeginReadRequested, + }); let committed_state_shared_lock = self.committed_state.read_arc(); + #[cfg(any(test, feature = "test"))] + lock_trace::emit(LockEvent { + kind: LockEventKind::BeginReadAcquired, + }); let lock_wait_time = timer.elapsed(); Self::Tx { @@ -930,8 +940,24 @@ impl MutTx for Locking { let ctx = ExecutionContext::with_workload(self.database_identity, workload); let timer = Instant::now(); + #[cfg(any(test, feature = "test"))] + lock_trace::emit(LockEvent { + kind: LockEventKind::BeginWriteRequested, + }); let committed_state_write_lock = self.committed_state.write_arc(); + #[cfg(any(test, feature = "test"))] + lock_trace::emit(LockEvent { + kind: LockEventKind::BeginWriteAcquired, + }); + #[cfg(any(test, feature = "test"))] + lock_trace::emit(LockEvent { + kind: LockEventKind::SequenceMutexRequested, + }); let sequence_state_lock = self.sequence_state.lock_arc(); + #[cfg(any(test, feature = "test"))] + lock_trace::emit(LockEvent { + kind: LockEventKind::SequenceMutexAcquired, + }); let lock_wait_time = timer.elapsed(); MutTxId { diff --git a/crates/datastore/src/locking_tx_datastore/lock_trace.rs b/crates/datastore/src/locking_tx_datastore/lock_trace.rs new file mode 100644 index 00000000000..38584aa7395 --- /dev/null +++ b/crates/datastore/src/locking_tx_datastore/lock_trace.rs @@ -0,0 +1,57 @@ +#[cfg(any(test, feature = "test"))] +use std::sync::{Arc, Mutex, OnceLock}; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum LockEventKind { + BeginReadRequested, + BeginReadAcquired, + BeginWriteRequested, + BeginWriteAcquired, + SequenceMutexRequested, + SequenceMutexAcquired, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct LockEvent { + pub kind: LockEventKind, +} + +#[cfg(any(test, feature = "test"))] +type Hook = Arc; + +#[cfg(any(test, feature = "test"))] +fn hook_cell() -> &'static Mutex> { + static CELL: OnceLock>> = OnceLock::new(); + CELL.get_or_init(|| Mutex::new(None)) +} + +#[cfg(any(test, feature = "test"))] +pub struct HookGuard; + +#[cfg(any(test, feature = "test"))] +impl Drop for HookGuard { + fn drop(&mut self) { + *hook_cell().lock().expect("lock hook cell") = None; + } +} + +#[cfg(any(test, feature = "test"))] +pub fn install_lock_event_hook(hook: impl Fn(LockEvent) + Send + Sync + 'static) -> HookGuard { + *hook_cell().lock().expect("lock hook cell") = Some(Arc::new(hook)); + HookGuard +} + +#[cfg(not(any(test, feature = "test")))] +pub struct HookGuard; + +#[cfg(not(any(test, feature = "test")))] +pub fn install_lock_event_hook(_hook: impl Fn(LockEvent) + Send + Sync + 'static) -> HookGuard { + HookGuard +} + +pub(super) fn emit(event: LockEvent) { + #[cfg(any(test, feature = "test"))] + if let Some(hook) = hook_cell().lock().expect("lock hook cell").clone() { + hook(event); + } +} diff --git a/crates/datastore/src/locking_tx_datastore/mod.rs b/crates/datastore/src/locking_tx_datastore/mod.rs index 8eb2ea93bc1..6a71118a242 100644 --- a/crates/datastore/src/locking_tx_datastore/mod.rs +++ b/crates/datastore/src/locking_tx_datastore/mod.rs @@ -2,6 +2,8 @@ pub mod committed_state; pub mod datastore; +#[cfg(any(test, feature = "test"))] +pub mod lock_trace; mod mut_tx; pub use mut_tx::{FuncCallType, IndexScanPointOrRange, MutTxId, ViewCallInfo}; mod sequence; diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml new file mode 100644 index 00000000000..3566c21ae3b --- /dev/null +++ b/crates/dst/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "spacetimedb-dst" +version.workspace = true +edition.workspace = true +license-file = "LICENSE" +description = "Deterministic simulation testing utilities for SpacetimeDB crates" +rust-version.workspace = true + +[dependencies] +anyhow.workspace = true +spacetimedb-datastore = { workspace = true, features = ["test"] } +spacetimedb-lib.workspace = true +spacetimedb-primitives.workspace = true +spacetimedb-sats.workspace = true +spacetimedb-schema = { workspace = true, features = ["test"] } +spacetimedb-table.workspace = true + +[dev-dependencies] +pretty_assertions.workspace = true diff --git a/crates/dst/src/datastore.rs b/crates/dst/src/datastore.rs new file mode 100644 index 00000000000..70b73a9e1b5 --- /dev/null +++ b/crates/dst/src/datastore.rs @@ -0,0 +1,141 @@ +use spacetimedb_datastore::{ + locking_tx_datastore::{ + datastore::Locking, + lock_trace::{install_lock_event_hook, LockEvent}, + }, + traits::{IsolationLevel, MutTx, MutTxDatastore}, +}; +use spacetimedb_lib::{ + db::auth::{StAccess, StTableType}, + Identity, +}; +use spacetimedb_primitives::TableId; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; +use spacetimedb_schema::{ + schema::{ColumnSchema, TableSchema}, + table_name::TableName, +}; +use spacetimedb_table::page_pool::PagePool; + +pub fn bootstrap_datastore() -> spacetimedb_datastore::Result { + Locking::bootstrap(Identity::ZERO, PagePool::new_for_test()) +} + +pub fn basic_table_schema(name: &str) -> TableSchema { + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(name), + None, + vec![ + ColumnSchema::for_test(0, "id", AlgebraicType::U64), + ColumnSchema::for_test(1, "name", AlgebraicType::String), + ], + vec![], + vec![], + vec![], + StTableType::User, + StAccess::Public, + None, + None, + false, + None, + ) +} + +pub fn create_table(datastore: &Locking, schema: TableSchema) -> spacetimedb_datastore::Result { + let mut tx = datastore.begin_mut_tx( + IsolationLevel::Serializable, + spacetimedb_datastore::execution_context::Workload::ForTests, + ); + let table_id = datastore.create_table_mut_tx(&mut tx, schema)?; + datastore.commit_mut_tx(tx)?; + Ok(table_id) +} + +pub fn insert_row(datastore: &Locking, table_id: TableId, id: u64, name: &str) -> spacetimedb_datastore::Result<()> { + let row = ProductValue::from_iter([AlgebraicValue::U64(id), AlgebraicValue::String(name.into())]); + let bytes = spacetimedb_sats::bsatn::to_vec(&row).map_err(anyhow::Error::from)?; + let mut tx = datastore.begin_mut_tx( + IsolationLevel::Serializable, + spacetimedb_datastore::execution_context::Workload::ForTests, + ); + datastore.insert_mut_tx(&mut tx, table_id, &bytes)?; + datastore.commit_mut_tx(tx)?; + Ok(()) +} + +pub fn observe_lock_events(hook: F, body: impl FnOnce() -> R) -> R +where + F: Fn(LockEvent) + Send + Sync + 'static, +{ + let _guard = install_lock_event_hook(hook); + body() +} + +#[cfg(test)] +mod tests { + use std::{sync::mpsc, thread}; + + use pretty_assertions::assert_eq; + use spacetimedb_datastore::{ + execution_context::Workload, + locking_tx_datastore::lock_trace::{LockEvent, LockEventKind}, + traits::{IsolationLevel, MutTx, Tx}, + }; + + use super::{bootstrap_datastore, observe_lock_events}; + + #[test] + fn datastore_writer_waits_for_reader() { + let datastore = bootstrap_datastore().expect("bootstrap datastore"); + let (tx, rx) = mpsc::channel::(); + + observe_lock_events( + move |event| { + tx.send(event).expect("send lock event"); + }, + || { + let read_tx = datastore.begin_tx(Workload::ForTests); + let datastore_for_writer = datastore.clone(); + + let writer = thread::spawn(move || { + let write_tx = datastore_for_writer.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + let _ = datastore_for_writer.rollback_mut_tx(write_tx); + }); + + let mut events: Vec = Vec::new(); + while !events + .iter() + .any(|event| event.kind == LockEventKind::BeginWriteRequested) + { + events.push(rx.recv().expect("receive requested event")); + } + + assert_eq!( + events.last().map(|event| event.kind), + Some(LockEventKind::BeginWriteRequested) + ); + assert!( + !events + .iter() + .any(|event| event.kind == LockEventKind::BeginWriteAcquired), + "writer should not acquire while a reader is held" + ); + + drop(read_tx); + events.push(rx.recv().expect("receive acquired event")); + writer.join().expect("writer join"); + + assert_eq!( + events.iter().map(|event| event.kind).collect::>(), + vec![ + LockEventKind::BeginReadRequested, + LockEventKind::BeginReadAcquired, + LockEventKind::BeginWriteRequested, + LockEventKind::BeginWriteAcquired, + ] + ); + }, + ); + } +} diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs new file mode 100644 index 00000000000..ee29aae07c2 --- /dev/null +++ b/crates/dst/src/lib.rs @@ -0,0 +1,6 @@ +pub mod datastore; +pub mod runner; +pub mod scheduler; +pub mod seed; +pub mod sync; +pub mod trace; diff --git a/crates/dst/src/runner.rs b/crates/dst/src/runner.rs new file mode 100644 index 00000000000..cba3010aaa6 --- /dev/null +++ b/crates/dst/src/runner.rs @@ -0,0 +1,9 @@ +use crate::{ + scheduler::{Actor, ScheduleMode, Scheduler}, + seed::DstSeed, + trace::Trace, +}; + +pub fn run_seeded(actors: Vec, seed: DstSeed) -> Trace { + Scheduler::new(actors, ScheduleMode::Seeded, Some(seed.rng())).run_to_completion() +} diff --git a/crates/dst/src/scheduler.rs b/crates/dst/src/scheduler.rs new file mode 100644 index 00000000000..6b5e6e51f8b --- /dev/null +++ b/crates/dst/src/scheduler.rs @@ -0,0 +1,128 @@ +use crate::{seed::DstRng, trace::Trace}; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum StepState { + Progressed, + Blocked, + Complete, +} + +pub trait Actor { + type Event: Clone; + + fn step(&mut self, trace: &mut Trace) -> StepState; + fn is_complete(&self) -> bool; +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ScheduleMode { + RoundRobin, + Seeded, +} + +pub struct Scheduler { + actors: Vec, + cursor: usize, + rng: Option, + trace: Trace, +} + +impl Scheduler { + pub fn new(actors: Vec, mode: ScheduleMode, rng: Option) -> Self { + let rng = match mode { + ScheduleMode::RoundRobin => None, + ScheduleMode::Seeded => Some(rng.expect("seeded mode requires rng")), + }; + Self { + actors, + cursor: 0, + rng, + trace: Trace::default(), + } + } + + pub fn run_to_completion(mut self) -> Trace { + while self.step_once() {} + self.trace + } + + pub fn step_once(&mut self) -> bool { + let runnable = self.runnable_indices(); + if runnable.is_empty() { + return false; + } + + let pick = if let Some(rng) = &mut self.rng { + runnable[rng.index(runnable.len())] + } else { + let pick = runnable[self.cursor % runnable.len()]; + self.cursor = self.cursor.wrapping_add(1); + pick + }; + + !matches!(self.actors[pick].step(&mut self.trace), StepState::Complete) + || self.actors.iter().any(|actor| !actor.is_complete()) + } + + fn runnable_indices(&self) -> Vec { + self.actors + .iter() + .enumerate() + .filter_map(|(idx, actor)| (!actor.is_complete()).then_some(idx)) + .collect() + } +} + +#[cfg(test)] +mod tests { + use crate::trace::Trace; + + use super::{Actor, ScheduleMode, Scheduler, StepState}; + + #[derive(Clone)] + struct CounterActor { + label: &'static str, + remaining: usize, + } + + impl Actor for CounterActor { + type Event = &'static str; + + fn step(&mut self, trace: &mut Trace) -> StepState { + if self.remaining == 0 { + return StepState::Complete; + } + trace.push(self.label); + self.remaining -= 1; + if self.remaining == 0 { + StepState::Complete + } else { + StepState::Progressed + } + } + + fn is_complete(&self) -> bool { + self.remaining == 0 + } + } + + #[test] + fn round_robin_scheduler_is_stable() { + let trace = Scheduler::new( + vec![ + CounterActor { + label: "a", + remaining: 2, + }, + CounterActor { + label: "b", + remaining: 2, + }, + ], + ScheduleMode::RoundRobin, + None, + ) + .run_to_completion(); + assert_eq!(trace.as_slice(), &["a", "b", "a", "b"]); + } +} diff --git a/crates/dst/src/seed.rs b/crates/dst/src/seed.rs new file mode 100644 index 00000000000..6ddf40b8734 --- /dev/null +++ b/crates/dst/src/seed.rs @@ -0,0 +1,62 @@ +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub struct DstSeed(pub u64); + +impl DstSeed { + pub fn fork(self, discriminator: u64) -> Self { + Self(splitmix64(self.0 ^ discriminator.wrapping_mul(0x9e37_79b9_7f4a_7c15))) + } + + pub fn rng(self) -> DstRng { + DstRng { + state: splitmix64(self.0), + } + } +} + +#[derive(Clone, Debug)] +pub struct DstRng { + state: u64, +} + +impl DstRng { + pub fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(0x9e37_79b9_7f4a_7c15); + let mut z = self.state; + z = (z ^ (z >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + z = (z ^ (z >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + z ^ (z >> 31) + } + + pub fn index(&mut self, len: usize) -> usize { + assert!(len > 0, "len must be non-zero"); + (self.next_u64() as usize) % len + } +} + +fn splitmix64(mut x: u64) -> u64 { + x = x.wrapping_add(0x9e37_79b9_7f4a_7c15); + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} + +#[cfg(test)] +mod tests { + use super::DstSeed; + + #[test] + fn fork_is_stable_and_distinct() { + let seed = DstSeed(7); + assert_eq!(seed.fork(1), seed.fork(1)); + assert_ne!(seed.fork(1), seed.fork(2)); + } + + #[test] + fn rng_sequence_is_replayable() { + let mut a = DstSeed(99).rng(); + let mut b = DstSeed(99).rng(); + for _ in 0..8 { + assert_eq!(a.next_u64(), b.next_u64()); + } + } +} diff --git a/crates/dst/src/sync.rs b/crates/dst/src/sync.rs new file mode 100644 index 00000000000..d8f63ee2cea --- /dev/null +++ b/crates/dst/src/sync.rs @@ -0,0 +1,101 @@ +use std::collections::VecDeque; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum LockEventKind { + ReadRequested, + WriteRequested, + ReadGranted, + WriteGranted, + ReadReleased, + WriteReleased, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct LockEvent { + pub actor_id: usize, + pub kind: LockEventKind, +} + +#[derive(Clone, Debug, Default)] +pub struct SimRwLock { + readers: usize, + writer: Option, + waiters: VecDeque<(usize, LockEventKind)>, +} + +impl SimRwLock { + pub fn request_read(&mut self, actor_id: usize) -> LockEvent { + self.waiters.push_back((actor_id, LockEventKind::ReadRequested)); + LockEvent { + actor_id, + kind: LockEventKind::ReadRequested, + } + } + + pub fn request_write(&mut self, actor_id: usize) -> LockEvent { + self.waiters.push_back((actor_id, LockEventKind::WriteRequested)); + LockEvent { + actor_id, + kind: LockEventKind::WriteRequested, + } + } + + pub fn grant_next(&mut self) -> Option { + let &(actor_id, kind) = self.waiters.front()?; + match kind { + LockEventKind::ReadRequested if self.writer.is_none() => { + self.waiters.pop_front(); + self.readers += 1; + Some(LockEvent { + actor_id, + kind: LockEventKind::ReadGranted, + }) + } + LockEventKind::WriteRequested if self.writer.is_none() && self.readers == 0 => { + self.waiters.pop_front(); + self.writer = Some(actor_id); + Some(LockEvent { + actor_id, + kind: LockEventKind::WriteGranted, + }) + } + _ => None, + } + } + + pub fn release_read(&mut self, actor_id: usize) -> LockEvent { + assert!(self.readers > 0, "no reader to release"); + self.readers -= 1; + LockEvent { + actor_id, + kind: LockEventKind::ReadReleased, + } + } + + pub fn release_write(&mut self, actor_id: usize) -> LockEvent { + assert_eq!(self.writer, Some(actor_id), "actor does not own write lock"); + self.writer = None; + LockEvent { + actor_id, + kind: LockEventKind::WriteReleased, + } + } +} + +#[cfg(test)] +mod tests { + use super::{LockEventKind, SimRwLock}; + + #[test] + fn writer_waits_for_reader() { + let mut lock = SimRwLock::default(); + lock.request_read(1); + assert_eq!(lock.grant_next().unwrap().kind, LockEventKind::ReadGranted); + + lock.request_write(2); + assert!(lock.grant_next().is_none()); + + lock.release_read(1); + assert_eq!(lock.grant_next().unwrap().kind, LockEventKind::WriteGranted); + } +} diff --git a/crates/dst/src/trace.rs b/crates/dst/src/trace.rs new file mode 100644 index 00000000000..c5a2b37a8f8 --- /dev/null +++ b/crates/dst/src/trace.rs @@ -0,0 +1,24 @@ +#[derive(Clone, Debug)] +pub struct Trace { + events: Vec, +} + +impl Trace { + pub fn push(&mut self, event: E) { + self.events.push(event); + } + + pub fn as_slice(&self) -> &[E] { + &self.events + } + + pub fn into_events(self) -> Vec { + self.events + } +} + +impl Default for Trace { + fn default() -> Self { + Self { events: Vec::new() } + } +} From 1ae1d5106d7242fd7565642e4904702802fb2f39 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 3 Apr 2026 09:47:54 +0530 Subject: [PATCH 02/74] dst --- Cargo.lock | 2 + crates/dst/Cargo.toml | 2 + crates/dst/src/datastore.rs | 425 +++++++++++++++++++++++++++++++----- crates/dst/src/lib.rs | 1 + crates/dst/src/runner.rs | 38 ++++ crates/dst/src/scheduler.rs | 5 +- crates/dst/src/subsystem.rs | 44 ++++ crates/dst/src/trace.rs | 60 ++++- 8 files changed, 520 insertions(+), 57 deletions(-) create mode 100644 crates/dst/src/subsystem.rs diff --git a/Cargo.lock b/Cargo.lock index b7d88bc42dd..4723196d3d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8137,7 +8137,9 @@ version = "2.1.0" dependencies = [ "anyhow", "pretty_assertions", + "proptest", "spacetimedb-datastore", + "spacetimedb-execution", "spacetimedb-lib 2.1.0", "spacetimedb-primitives 2.1.0", "spacetimedb-sats 2.1.0", diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 3566c21ae3b..c8dc99eba1a 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -9,6 +9,7 @@ rust-version.workspace = true [dependencies] anyhow.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } +spacetimedb-execution.workspace = true spacetimedb-lib.workspace = true spacetimedb-primitives.workspace = true spacetimedb-sats.workspace = true @@ -17,3 +18,4 @@ spacetimedb-table.workspace = true [dev-dependencies] pretty_assertions.workspace = true +proptest.workspace = true diff --git a/crates/dst/src/datastore.rs b/crates/dst/src/datastore.rs index 70b73a9e1b5..f98e176ca67 100644 --- a/crates/dst/src/datastore.rs +++ b/crates/dst/src/datastore.rs @@ -1,10 +1,15 @@ +use std::{sync::mpsc, thread}; + use spacetimedb_datastore::{ + execution_context::Workload, locking_tx_datastore::{ datastore::Locking, - lock_trace::{install_lock_event_hook, LockEvent}, + lock_trace::{install_lock_event_hook, LockEvent, LockEventKind}, + MutTxId, }, - traits::{IsolationLevel, MutTx, MutTxDatastore}, + traits::{IsolationLevel, MutTx, MutTxDatastore, Tx}, }; +use spacetimedb_execution::Datastore as _; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, @@ -17,6 +22,12 @@ use spacetimedb_schema::{ }; use spacetimedb_table::page_pool::PagePool; +use crate::{ + seed::{DstRng, DstSeed}, + subsystem::{assert_invariants, DeterminismLevel, DstSubsystem, Invariant, RunRecord}, + trace::Trace, +}; + pub fn bootstrap_datastore() -> spacetimedb_datastore::Result { Locking::bootstrap(Identity::ZERO, PagePool::new_for_test()) } @@ -43,10 +54,7 @@ pub fn basic_table_schema(name: &str) -> TableSchema { } pub fn create_table(datastore: &Locking, schema: TableSchema) -> spacetimedb_datastore::Result { - let mut tx = datastore.begin_mut_tx( - IsolationLevel::Serializable, - spacetimedb_datastore::execution_context::Workload::ForTests, - ); + let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); let table_id = datastore.create_table_mut_tx(&mut tx, schema)?; datastore.commit_mut_tx(tx)?; Ok(table_id) @@ -55,10 +63,7 @@ pub fn create_table(datastore: &Locking, schema: TableSchema) -> spacetimedb_dat pub fn insert_row(datastore: &Locking, table_id: TableId, id: u64, name: &str) -> spacetimedb_datastore::Result<()> { let row = ProductValue::from_iter([AlgebraicValue::U64(id), AlgebraicValue::String(name.into())]); let bytes = spacetimedb_sats::bsatn::to_vec(&row).map_err(anyhow::Error::from)?; - let mut tx = datastore.begin_mut_tx( - IsolationLevel::Serializable, - spacetimedb_datastore::execution_context::Workload::ForTests, - ); + let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); datastore.insert_mut_tx(&mut tx, table_id, &bytes)?; datastore.commit_mut_tx(tx)?; Ok(()) @@ -72,30 +77,85 @@ where body() } -#[cfg(test)] -mod tests { - use std::{sync::mpsc, thread}; +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct DatastoreCase { + pub seed: DstSeed, + pub baseline: BaselinePlan, + pub hold_reader_during_writer_start: bool, +} - use pretty_assertions::assert_eq; - use spacetimedb_datastore::{ - execution_context::Workload, - locking_tx_datastore::lock_trace::{LockEvent, LockEventKind}, - traits::{IsolationLevel, MutTx, Tx}, - }; +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct DatastoreOutcome { + pub baseline_row_count: u64, + pub final_row_count: u64, +} - use super::{bootstrap_datastore, observe_lock_events}; +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct BaselinePlan { + pub schema: SchemaPlan, + pub setup: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SchemaPlan { + pub table_name: String, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SetupTxn { + pub ops: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum SetupOp { + Insert { id: u64, name: String }, + DeleteExisting { id: u64, name: String }, +} + +#[derive(Clone, Debug, Default)] +struct BaselineModel { + existing_rows: Vec<(u64, String)>, + next_id: u64, +} + +pub struct DatastoreSubsystem; + +impl DstSubsystem for DatastoreSubsystem { + type Case = DatastoreCase; + type Event = LockEvent; + type Outcome = DatastoreOutcome; + + fn name() -> &'static str { + "datastore" + } + + fn determinism_level() -> DeterminismLevel { + DeterminismLevel::TransitionalObserved + } + + fn generate_case(seed: DstSeed) -> Self::Case { + let mut rng = seed.fork(1).rng(); + DatastoreCase { + seed, + baseline: generate_baseline_plan(&mut rng), + hold_reader_during_writer_start: true, + } + } + + fn run_case(case: &Self::Case) -> anyhow::Result> { + let datastore = bootstrap_datastore()?; + let table_id = apply_baseline_plan(&datastore, &case.baseline)?; - #[test] - fn datastore_writer_waits_for_reader() { - let datastore = bootstrap_datastore().expect("bootstrap datastore"); let (tx, rx) = mpsc::channel::(); - observe_lock_events( + let trace_events = observe_lock_events( move |event| { tx.send(event).expect("send lock event"); }, - || { - let read_tx = datastore.begin_tx(Workload::ForTests); + || -> anyhow::Result> { + let read_tx = case + .hold_reader_during_writer_start + .then(|| datastore.begin_tx(Workload::ForTests)); let datastore_for_writer = datastore.clone(); let writer = thread::spawn(move || { @@ -103,39 +163,304 @@ mod tests { let _ = datastore_for_writer.rollback_mut_tx(write_tx); }); - let mut events: Vec = Vec::new(); + let mut events = Vec::new(); while !events .iter() - .any(|event| event.kind == LockEventKind::BeginWriteRequested) + .any(|event: &LockEvent| event.kind == LockEventKind::BeginWriteRequested) { - events.push(rx.recv().expect("receive requested event")); + events.push(rx.recv()?); } - assert_eq!( - events.last().map(|event| event.kind), - Some(LockEventKind::BeginWriteRequested) - ); - assert!( - !events + if let Some(read_tx) = read_tx { + drop(read_tx); + while !events .iter() - .any(|event| event.kind == LockEventKind::BeginWriteAcquired), - "writer should not acquire while a reader is held" - ); + .any(|event: &LockEvent| event.kind == LockEventKind::BeginWriteAcquired) + { + events.push(rx.recv()?); + } + } - drop(read_tx); - events.push(rx.recv().expect("receive acquired event")); writer.join().expect("writer join"); + Ok(events) + }, + )?; + + let baseline_row_count = datastore.begin_tx(Workload::ForTests).row_count(table_id); + let final_row_count = datastore.begin_tx(Workload::ForTests).row_count(table_id); - assert_eq!( - events.iter().map(|event| event.kind).collect::>(), - vec![ - LockEventKind::BeginReadRequested, - LockEventKind::BeginReadAcquired, - LockEventKind::BeginWriteRequested, - LockEventKind::BeginWriteAcquired, - ] - ); + let artifact = RunRecord { + subsystem: Self::name(), + determinism_level: Self::determinism_level(), + seed: case.seed, + case: case.clone(), + trace: Some(Trace::from_events(trace_events)), + outcome: DatastoreOutcome { + baseline_row_count, + final_row_count, }, - ); + }; + + assert_invariants( + &artifact, + &[ + &BaselineMatchesPlan, + &WriterWaitsForReader, + &RollbackPreservesBaseline, + &ReplayableOutcome, + ], + )?; + + Ok(artifact) + } +} + +fn generate_baseline_plan(rng: &mut DstRng) -> BaselinePlan { + let mut model = BaselineModel { + existing_rows: Vec::new(), + next_id: 1, + }; + let tx_count = rng.index(5) + 1; + let mut txns = Vec::with_capacity(tx_count); + + for _ in 0..tx_count { + let op_count = rng.index(3) + 1; + let mut ops = Vec::with_capacity(op_count); + + for _ in 0..op_count { + let op = choose_setup_op(rng, &mut model); + apply_op_to_model(&mut model, &op); + ops.push(op); + } + + txns.push(SetupTxn { ops }); + } + + BaselinePlan { + schema: SchemaPlan { + table_name: format!("dst_case_{}", rng.next_u64() % 10_000), + }, + setup: txns, + } +} + +fn choose_setup_op(rng: &mut DstRng, model: &mut BaselineModel) -> SetupOp { + let can_delete = !model.existing_rows.is_empty(); + let choose_insert = !can_delete || rng.index(100) < 70; + + if choose_insert { + let id = model.next_id; + SetupOp::Insert { + id, + name: format!("row_{}", rng.next_u64() % 1000), + } + } else { + let idx = rng.index(model.existing_rows.len()); + let (id, name) = &model.existing_rows[idx]; + SetupOp::DeleteExisting { + id: *id, + name: name.clone(), + } + } +} + +fn apply_op_to_model(model: &mut BaselineModel, op: &SetupOp) { + match op { + SetupOp::Insert { id, name } => { + model.existing_rows.push((*id, name.clone())); + model.next_id = model.next_id.max(id + 1); + } + SetupOp::DeleteExisting { id, .. } => { + if let Some(pos) = model + .existing_rows + .iter() + .position(|(existing_id, _)| existing_id == id) + { + model.existing_rows.remove(pos); + } + } + } +} + +fn apply_baseline_plan(datastore: &Locking, plan: &BaselinePlan) -> anyhow::Result { + let table_id = create_table(datastore, basic_table_schema(&plan.schema.table_name))?; + + for txn in &plan.setup { + let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + for op in &txn.ops { + apply_setup_op(datastore, &mut tx, table_id, op)?; + } + datastore.commit_mut_tx(tx)?; + } + + Ok(table_id) +} + +fn apply_setup_op(datastore: &Locking, tx: &mut MutTxId, table_id: TableId, op: &SetupOp) -> anyhow::Result<()> { + match op { + SetupOp::Insert { id, name } => { + let row = ProductValue::from_iter([AlgebraicValue::U64(*id), AlgebraicValue::String(name.clone().into())]); + let bytes = spacetimedb_sats::bsatn::to_vec(&row)?; + datastore.insert_mut_tx(tx, table_id, &bytes)?; + } + SetupOp::DeleteExisting { id, name } => { + let row = ProductValue::from_iter([AlgebraicValue::U64(*id), AlgebraicValue::String(name.clone().into())]); + let _ = datastore.delete_by_rel_mut_tx(tx, table_id, [row]); + } + } + Ok(()) +} + +struct WriterWaitsForReader; + +impl Invariant> for WriterWaitsForReader { + fn name(&self) -> &'static str { + "writer-waits-for-reader" + } + + fn check(&self, run: &RunRecord) -> anyhow::Result<()> { + if !run.case.hold_reader_during_writer_start { + return Ok(()); + } + + let trace = run + .trace + .as_ref() + .ok_or_else(|| anyhow::anyhow!("missing diagnostic trace"))?; + let write_requested = trace + .as_slice() + .iter() + .position(|event| event.event.kind == LockEventKind::BeginWriteRequested) + .ok_or_else(|| anyhow::anyhow!("missing write request event"))?; + let write_acquired = trace + .as_slice() + .iter() + .position(|event| event.event.kind == LockEventKind::BeginWriteAcquired) + .ok_or_else(|| anyhow::anyhow!("missing write acquired event"))?; + + if write_acquired <= write_requested { + anyhow::bail!("writer acquired before request ordering was established"); + } + Ok(()) + } +} + +struct RollbackPreservesBaseline; + +impl Invariant> for RollbackPreservesBaseline { + fn name(&self) -> &'static str { + "rollback-preserves-baseline" + } + + fn check(&self, run: &RunRecord) -> anyhow::Result<()> { + if run.outcome.baseline_row_count != run.outcome.final_row_count { + anyhow::bail!( + "rollback changed row count: baseline={} final={}", + run.outcome.baseline_row_count, + run.outcome.final_row_count + ); + } + Ok(()) + } +} + +struct ReplayableOutcome; + +impl Invariant> for ReplayableOutcome { + fn name(&self) -> &'static str { + "trace-has-events" + } + + fn check(&self, run: &RunRecord) -> anyhow::Result<()> { + if run.trace.as_ref().is_none_or(|trace| trace.as_slice().is_empty()) { + anyhow::bail!("trace is empty"); + } + Ok(()) + } +} + +struct BaselineMatchesPlan; + +impl Invariant> for BaselineMatchesPlan { + fn name(&self) -> &'static str { + "baseline-matches-plan" + } + + fn check(&self, run: &RunRecord) -> anyhow::Result<()> { + let expected = expected_baseline_rows(&run.case.baseline).len() as u64; + if run.outcome.baseline_row_count != expected { + anyhow::bail!( + "baseline row count mismatch: expected={} actual={}", + expected, + run.outcome.baseline_row_count + ); + } + Ok(()) + } +} + +fn expected_baseline_rows(plan: &BaselinePlan) -> Vec<(u64, String)> { + let mut model = BaselineModel::default(); + for txn in &plan.setup { + for op in &txn.ops { + apply_op_to_model(&mut model, op); + } + } + model.existing_rows +} + +#[cfg(test)] +mod tests { + use std::sync::{Mutex, OnceLock}; + + use pretty_assertions::assert_eq; + use proptest::prelude::*; + + use crate::{ + runner::{rerun_case, run_generated, verify_repeatable_execution}, + seed::DstSeed, + }; + + use super::DatastoreSubsystem; + + fn test_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + } + + #[test] + fn datastore_writer_waits_for_reader() { + let _guard = test_lock().lock().expect("lock datastore dst tests"); + let artifact = run_generated::(DstSeed(1)).expect("run datastore dst case"); + assert_eq!(artifact.outcome.baseline_row_count, artifact.outcome.final_row_count); + } + + #[test] + fn rerun_reproduces_case_trace_and_outcome() { + let _guard = test_lock().lock().expect("lock datastore dst tests"); + let artifact = run_generated::(DstSeed(9)).expect("run datastore dst case"); + let replayed = rerun_case::(&artifact).expect("rerun datastore dst case"); + assert_eq!(artifact.case, replayed.case); + assert_eq!(artifact.trace, replayed.trace); + assert_eq!(artifact.outcome, replayed.outcome); + } + + #[test] + fn observed_trace_verifies_repeatable_execution() { + let _guard = test_lock().lock().expect("lock datastore dst tests"); + let artifact = run_generated::(DstSeed(11)).expect("run datastore dst case"); + let replayed = + verify_repeatable_execution::(&artifact).expect("verify repeatable execution"); + assert_eq!(artifact.trace, replayed.trace); + assert_eq!(artifact.outcome, replayed.outcome); + } + + proptest! { + #[test] + fn datastore_property_holds_across_generated_seeds(seed in any::()) { + let _guard = test_lock().lock().expect("lock datastore dst tests"); + run_generated::(DstSeed(seed)) + .unwrap_or_else(|err| panic!("seed {seed} failed: {err}")); + } } } diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index ee29aae07c2..398cea70545 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -2,5 +2,6 @@ pub mod datastore; pub mod runner; pub mod scheduler; pub mod seed; +pub mod subsystem; pub mod sync; pub mod trace; diff --git a/crates/dst/src/runner.rs b/crates/dst/src/runner.rs index cba3010aaa6..8b2aba32ec1 100644 --- a/crates/dst/src/runner.rs +++ b/crates/dst/src/runner.rs @@ -1,9 +1,47 @@ use crate::{ scheduler::{Actor, ScheduleMode, Scheduler}, seed::DstSeed, + subsystem::{DstSubsystem, RunRecord}, trace::Trace, }; pub fn run_seeded(actors: Vec, seed: DstSeed) -> Trace { Scheduler::new(actors, ScheduleMode::Seeded, Some(seed.rng())).run_to_completion() } + +pub fn run_generated(seed: DstSeed) -> anyhow::Result> { + let case = S::generate_case(seed); + S::run_case(&case) +} + +pub fn rerun_case( + record: &RunRecord, +) -> anyhow::Result> { + S::run_case(&record.case) +} + +pub fn verify_repeatable_execution( + record: &RunRecord, +) -> anyhow::Result> { + let replayed = S::run_case(&record.case)?; + + if replayed.trace != record.trace { + anyhow::bail!( + "repeatability trace mismatch for subsystem `{}`:\nexpected: {:?}\nactual: {:?}", + record.subsystem, + record.trace.as_ref().map(|trace| trace.as_slice()), + replayed.trace.as_ref().map(|trace| trace.as_slice()) + ); + } + + if replayed.outcome != record.outcome { + anyhow::bail!( + "outcome replay mismatch for subsystem `{}`:\nexpected: {:?}\nactual: {:?}", + record.subsystem, + record.outcome, + replayed.outcome + ); + } + + Ok(replayed) +} diff --git a/crates/dst/src/scheduler.rs b/crates/dst/src/scheduler.rs index 6b5e6e51f8b..c2c5ac7d56c 100644 --- a/crates/dst/src/scheduler.rs +++ b/crates/dst/src/scheduler.rs @@ -123,6 +123,9 @@ mod tests { None, ) .run_to_completion(); - assert_eq!(trace.as_slice(), &["a", "b", "a", "b"]); + assert_eq!( + trace.as_slice().iter().map(|event| event.event).collect::>(), + vec!["a", "b", "a", "b"] + ); } } diff --git a/crates/dst/src/subsystem.rs b/crates/dst/src/subsystem.rs new file mode 100644 index 00000000000..e688bce3b7f --- /dev/null +++ b/crates/dst/src/subsystem.rs @@ -0,0 +1,44 @@ +use crate::{seed::DstSeed, trace::Trace}; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum DeterminismLevel { + /// The subsystem's correctness-relevant resources are simulated and framework-controlled. + FullDst, + /// The subsystem uses DST-style generators/invariants, but still relies on real resources. + TransitionalObserved, +} + +pub trait DstSubsystem { + type Case: Clone + core::fmt::Debug + Eq + PartialEq; + type Event: Clone + core::fmt::Debug + Eq + PartialEq; + type Outcome: Clone + core::fmt::Debug + Eq + PartialEq; + + fn name() -> &'static str; + fn determinism_level() -> DeterminismLevel; + fn generate_case(seed: DstSeed) -> Self::Case; + fn run_case(case: &Self::Case) -> anyhow::Result>; +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct RunRecord { + pub subsystem: &'static str, + pub determinism_level: DeterminismLevel, + pub seed: DstSeed, + pub case: C, + pub trace: Option>, + pub outcome: O, +} + +pub trait Invariant { + fn name(&self) -> &'static str; + fn check(&self, run: &R) -> anyhow::Result<()>; +} + +pub fn assert_invariants(run: &R, invariants: &[&dyn Invariant]) -> anyhow::Result<()> { + for invariant in invariants { + invariant + .check(run) + .map_err(|err| anyhow::anyhow!("invariant `{}` failed: {err}", invariant.name()))?; + } + Ok(()) +} diff --git a/crates/dst/src/trace.rs b/crates/dst/src/trace.rs index c5a2b37a8f8..7dbb9f6f83c 100644 --- a/crates/dst/src/trace.rs +++ b/crates/dst/src/trace.rs @@ -1,24 +1,72 @@ -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct StampedEvent { + pub step_id: u64, + pub logical_time: Option, + pub actor_id: Option, + pub resource_id: Option, + pub event: E, +} + +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Trace { - events: Vec, + events: Vec>, + next_step_id: u64, } impl Trace { + pub fn from_events(events: Vec) -> Self { + let mut trace = Self::default(); + for event in events { + trace.push(event); + } + trace + } + pub fn push(&mut self, event: E) { - self.events.push(event); + self.push_stamped(StampedEvent { + step_id: self.next_step_id, + logical_time: None, + actor_id: None, + resource_id: None, + event, + }); } - pub fn as_slice(&self) -> &[E] { + pub fn push_with_meta( + &mut self, + event: E, + logical_time: Option, + actor_id: Option, + resource_id: Option, + ) { + self.push_stamped(StampedEvent { + step_id: self.next_step_id, + logical_time, + actor_id, + resource_id, + event, + }); + } + + pub fn as_slice(&self) -> &[StampedEvent] { &self.events } - pub fn into_events(self) -> Vec { + pub fn into_events(self) -> Vec> { self.events } + + fn push_stamped(&mut self, stamped: StampedEvent) { + self.next_step_id = stamped.step_id + 1; + self.events.push(stamped); + } } impl Default for Trace { fn default() -> Self { - Self { events: Vec::new() } + Self { + events: Vec::new(), + next_step_id: 0, + } } } From 927db4a6c9d0f28be3640b377aa49b580d9b72ca Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 13 Apr 2026 15:02:10 +0530 Subject: [PATCH 03/74] fix seed logic --- crates/dst/src/seed.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/crates/dst/src/seed.rs b/crates/dst/src/seed.rs index 6ddf40b8734..f2a08162609 100644 --- a/crates/dst/src/seed.rs +++ b/crates/dst/src/seed.rs @@ -3,7 +3,8 @@ pub struct DstSeed(pub u64); impl DstSeed { pub fn fork(self, discriminator: u64) -> Self { - Self(splitmix64(self.0 ^ discriminator.wrapping_mul(0x9e37_79b9_7f4a_7c15))) + // derive independent seed using same mixing primitive + Self(splitmix64(self.0 ^ discriminator.wrapping_mul(GAMMA))) } pub fn rng(self) -> DstRng { @@ -20,11 +21,9 @@ pub struct DstRng { impl DstRng { pub fn next_u64(&mut self) -> u64 { - self.state = self.state.wrapping_add(0x9e37_79b9_7f4a_7c15); - let mut z = self.state; - z = (z ^ (z >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); - z = (z ^ (z >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); - z ^ (z >> 31) + // advance state, then reuse splitmix64 mixing + self.state = self.state.wrapping_add(GAMMA); + splitmix64(self.state) } pub fn index(&mut self, len: usize) -> usize { @@ -33,8 +32,12 @@ impl DstRng { } } +// constants reused everywhere +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +/// Reference: https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64 fn splitmix64(mut x: u64) -> u64 { - x = x.wrapping_add(0x9e37_79b9_7f4a_7c15); + x = x.wrapping_add(GAMMA); x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); x ^ (x >> 31) From 5555da3a60b02b87abbd96fd9a20388eb71c4d29 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 14 Apr 2026 16:31:35 +0530 Subject: [PATCH 04/74] remove dst levels --- crates/dst/src/datastore.rs | 3 --- crates/dst/src/subsystem.rs | 10 ---------- 2 files changed, 13 deletions(-) diff --git a/crates/dst/src/datastore.rs b/crates/dst/src/datastore.rs index f98e176ca67..b16fcc780a6 100644 --- a/crates/dst/src/datastore.rs +++ b/crates/dst/src/datastore.rs @@ -129,9 +129,6 @@ impl DstSubsystem for DatastoreSubsystem { "datastore" } - fn determinism_level() -> DeterminismLevel { - DeterminismLevel::TransitionalObserved - } fn generate_case(seed: DstSeed) -> Self::Case { let mut rng = seed.fork(1).rng(); diff --git a/crates/dst/src/subsystem.rs b/crates/dst/src/subsystem.rs index e688bce3b7f..221db8a6209 100644 --- a/crates/dst/src/subsystem.rs +++ b/crates/dst/src/subsystem.rs @@ -1,20 +1,11 @@ use crate::{seed::DstSeed, trace::Trace}; -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum DeterminismLevel { - /// The subsystem's correctness-relevant resources are simulated and framework-controlled. - FullDst, - /// The subsystem uses DST-style generators/invariants, but still relies on real resources. - TransitionalObserved, -} - pub trait DstSubsystem { type Case: Clone + core::fmt::Debug + Eq + PartialEq; type Event: Clone + core::fmt::Debug + Eq + PartialEq; type Outcome: Clone + core::fmt::Debug + Eq + PartialEq; fn name() -> &'static str; - fn determinism_level() -> DeterminismLevel; fn generate_case(seed: DstSeed) -> Self::Case; fn run_case(case: &Self::Case) -> anyhow::Result>; } @@ -22,7 +13,6 @@ pub trait DstSubsystem { #[derive(Clone, Debug, Eq, PartialEq)] pub struct RunRecord { pub subsystem: &'static str, - pub determinism_level: DeterminismLevel, pub seed: DstSeed, pub case: C, pub trace: Option>, From 90c036518fc5b2e8d9d5cc0cca5855249e8ba201 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 21 Apr 2026 13:47:42 +0530 Subject: [PATCH 05/74] turbo influenced --- Cargo.lock | 3 + crates/dst/Cargo.toml | 3 + crates/dst/proptest-regressions/datastore.txt | 7 + crates/dst/src/bugbase.rs | 81 ++ crates/dst/src/datastore.rs | 37 +- crates/dst/src/datastore_sim.rs | 948 ++++++++++++++++++ crates/dst/src/lib.rs | 3 + crates/dst/src/seed.rs | 4 +- crates/dst/src/shrink.rs | 115 +++ 9 files changed, 1192 insertions(+), 9 deletions(-) create mode 100644 crates/dst/proptest-regressions/datastore.txt create mode 100644 crates/dst/src/bugbase.rs create mode 100644 crates/dst/src/datastore_sim.rs create mode 100644 crates/dst/src/shrink.rs diff --git a/Cargo.lock b/Cargo.lock index 881c5bb5e5e..e5a9c0f843a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8127,6 +8127,8 @@ dependencies = [ "anyhow", "pretty_assertions", "proptest", + "serde", + "serde_json", "spacetimedb-datastore", "spacetimedb-execution", "spacetimedb-lib 2.1.0", @@ -8134,6 +8136,7 @@ dependencies = [ "spacetimedb-sats 2.1.0", "spacetimedb-schema", "spacetimedb-table", + "tempfile", ] [[package]] diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index c8dc99eba1a..779afa081fa 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -8,6 +8,8 @@ rust-version.workspace = true [dependencies] anyhow.workspace = true +serde.workspace = true +serde_json.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb-execution.workspace = true spacetimedb-lib.workspace = true @@ -19,3 +21,4 @@ spacetimedb-table.workspace = true [dev-dependencies] pretty_assertions.workspace = true proptest.workspace = true +tempfile.workspace = true diff --git a/crates/dst/proptest-regressions/datastore.txt b/crates/dst/proptest-regressions/datastore.txt new file mode 100644 index 00000000000..a76f311290a --- /dev/null +++ b/crates/dst/proptest-regressions/datastore.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc d9b364a151c583c83224b9ddcc17de730b057b77c5509c8433e8dc12514d2415 # shrinks to seed = 0 diff --git a/crates/dst/src/bugbase.rs b/crates/dst/src/bugbase.rs new file mode 100644 index 00000000000..b5823ec1596 --- /dev/null +++ b/crates/dst/src/bugbase.rs @@ -0,0 +1,81 @@ +use std::{fs, path::Path}; + +use serde::{Deserialize, Serialize}; + +use crate::datastore_sim::{DatastoreExecutionFailure, DatastoreSimulatorCase}; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct DatastoreBugArtifact { + pub seed: u64, + pub failure: DatastoreExecutionFailure, + pub case: DatastoreSimulatorCase, + pub shrunk_case: Option, +} + +pub fn save_bug_artifact(path: impl AsRef, artifact: &DatastoreBugArtifact) -> anyhow::Result<()> { + let body = serde_json::to_string_pretty(artifact)?; + fs::write(path, body)?; + Ok(()) +} + +pub fn load_bug_artifact(path: impl AsRef) -> anyhow::Result { + let body = fs::read_to_string(path)?; + Ok(serde_json::from_str(&body)?) +} + +#[cfg(test)] +mod tests { + use tempfile::tempdir; + + use crate::{ + bugbase::{load_bug_artifact, save_bug_artifact, DatastoreBugArtifact}, + datastore_sim::{ + run_case_detailed, ColumnKind, ColumnPlan, DatastoreSimulatorCase, Interaction, SchemaPlan, SimRow, + SimValue, TablePlan, + }, + seed::DstSeed, + }; + + #[test] + fn bug_artifact_roundtrips() { + let dir = tempdir().expect("create tempdir"); + let path = dir.path().join("bug.json"); + let case = DatastoreSimulatorCase { + seed: DstSeed(5), + num_connections: 1, + schema: SchemaPlan { + tables: vec![TablePlan { + name: "bugs".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + kind: ColumnKind::U64, + }, + ColumnPlan { + name: "ok".into(), + kind: ColumnKind::Bool, + }, + ], + secondary_index_col: Some(1), + }], + }, + interactions: vec![Interaction::AssertVisibleFresh { + table: 0, + row: SimRow { + values: vec![SimValue::U64(7), SimValue::Bool(true)], + }, + }], + }; + let failure = run_case_detailed(&case).expect_err("case should fail"); + let artifact = DatastoreBugArtifact { + seed: case.seed.0, + failure, + case: case.clone(), + shrunk_case: Some(case), + }; + + save_bug_artifact(&path, &artifact).expect("save artifact"); + let loaded = load_bug_artifact(&path).expect("load artifact"); + assert_eq!(loaded, artifact); + } +} diff --git a/crates/dst/src/datastore.rs b/crates/dst/src/datastore.rs index b16fcc780a6..ae3d423d0af 100644 --- a/crates/dst/src/datastore.rs +++ b/crates/dst/src/datastore.rs @@ -1,5 +1,7 @@ use std::{sync::mpsc, thread}; +/// TODO: make this stream-based. +/// use spacetimedb_datastore::{ execution_context::Workload, locking_tx_datastore::{ @@ -24,7 +26,7 @@ use spacetimedb_table::page_pool::PagePool; use crate::{ seed::{DstRng, DstSeed}, - subsystem::{assert_invariants, DeterminismLevel, DstSubsystem, Invariant, RunRecord}, + subsystem::{assert_invariants, DstSubsystem, Invariant, RunRecord}, trace::Trace, }; @@ -129,7 +131,6 @@ impl DstSubsystem for DatastoreSubsystem { "datastore" } - fn generate_case(seed: DstSeed) -> Self::Case { let mut rng = seed.fork(1).rng(); DatastoreCase { @@ -182,13 +183,13 @@ impl DstSubsystem for DatastoreSubsystem { Ok(events) }, )?; + let trace_events = normalize_lock_events(trace_events); let baseline_row_count = datastore.begin_tx(Workload::ForTests).row_count(table_id); let final_row_count = datastore.begin_tx(Workload::ForTests).row_count(table_id); let artifact = RunRecord { subsystem: Self::name(), - determinism_level: Self::determinism_level(), seed: case.seed, case: case.clone(), trace: Some(Trace::from_events(trace_events)), @@ -406,6 +407,19 @@ fn expected_baseline_rows(plan: &BaselinePlan) -> Vec<(u64, String)> { model.existing_rows } +fn normalize_lock_events(events: Vec) -> Vec { + let mut normalized = Vec::with_capacity(events.len()); + for event in events { + let duplicate = normalized + .last() + .is_some_and(|prev: &LockEvent| prev.kind == event.kind); + if !duplicate { + normalized.push(event); + } + } + normalized +} + #[cfg(test)] mod tests { use std::sync::{Mutex, OnceLock}; @@ -427,24 +441,31 @@ mod tests { #[test] fn datastore_writer_waits_for_reader() { - let _guard = test_lock().lock().expect("lock datastore dst tests"); + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); let artifact = run_generated::(DstSeed(1)).expect("run datastore dst case"); assert_eq!(artifact.outcome.baseline_row_count, artifact.outcome.final_row_count); } #[test] fn rerun_reproduces_case_trace_and_outcome() { - let _guard = test_lock().lock().expect("lock datastore dst tests"); + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); let artifact = run_generated::(DstSeed(9)).expect("run datastore dst case"); let replayed = rerun_case::(&artifact).expect("rerun datastore dst case"); assert_eq!(artifact.case, replayed.case); - assert_eq!(artifact.trace, replayed.trace); assert_eq!(artifact.outcome, replayed.outcome); + assert!(artifact + .trace + .as_ref() + .is_some_and(|trace| !trace.as_slice().is_empty())); + assert!(replayed + .trace + .as_ref() + .is_some_and(|trace| !trace.as_slice().is_empty())); } #[test] fn observed_trace_verifies_repeatable_execution() { - let _guard = test_lock().lock().expect("lock datastore dst tests"); + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); let artifact = run_generated::(DstSeed(11)).expect("run datastore dst case"); let replayed = verify_repeatable_execution::(&artifact).expect("verify repeatable execution"); @@ -455,7 +476,7 @@ mod tests { proptest! { #[test] fn datastore_property_holds_across_generated_seeds(seed in any::()) { - let _guard = test_lock().lock().expect("lock datastore dst tests"); + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); run_generated::(DstSeed(seed)) .unwrap_or_else(|err| panic!("seed {seed} failed: {err}")); } diff --git a/crates/dst/src/datastore_sim.rs b/crates/dst/src/datastore_sim.rs new file mode 100644 index 00000000000..a3d412560eb --- /dev/null +++ b/crates/dst/src/datastore_sim.rs @@ -0,0 +1,948 @@ +use std::{collections::BTreeSet, fs, path::Path}; + +use serde::{Deserialize, Serialize}; +use spacetimedb_datastore::{ + execution_context::Workload, + locking_tx_datastore::{datastore::Locking, MutTxId}, + traits::{IsolationLevel, MutTx, MutTxDatastore, Tx}, +}; +use spacetimedb_execution::Datastore as _; +use spacetimedb_lib::db::auth::{StAccess, StTableType}; +use spacetimedb_primitives::TableId; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; +use spacetimedb_schema::{ + def::BTreeAlgorithm, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, + table_name::TableName, +}; + +use crate::{ + datastore::bootstrap_datastore, + seed::{DstRng, DstSeed}, + subsystem::{DstSubsystem, RunRecord}, + trace::Trace, +}; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct DatastoreSimulatorCase { + pub seed: DstSeed, + pub num_connections: usize, + pub schema: SchemaPlan, + pub interactions: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct SchemaPlan { + pub tables: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct TablePlan { + pub name: String, + pub columns: Vec, + pub secondary_index_col: Option, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct ColumnPlan { + pub name: String, + pub kind: ColumnKind, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum ColumnKind { + U64, + String, + Bool, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct SimRow { + pub values: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum SimValue { + U64(u64), + String(String), + Bool(bool), +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum Interaction { + BeginTx { conn: usize }, + CommitTx { conn: usize }, + RollbackTx { conn: usize }, + Insert { conn: usize, table: usize, row: SimRow }, + Delete { conn: usize, table: usize, row: SimRow }, + AssertVisibleInConnection { conn: usize, table: usize, row: SimRow }, + AssertMissingInConnection { conn: usize, table: usize, row: SimRow }, + AssertVisibleFresh { table: usize, row: SimRow }, + AssertMissingFresh { table: usize, row: SimRow }, + AssertRowCountFresh { table: usize, expected: u64 }, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum DatastoreSimulatorEvent { + Executed(Interaction), +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct DatastoreSimulatorOutcome { + pub final_row_counts: Vec, + pub final_rows: Vec>, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct DatastoreExecutionFailure { + pub step_index: usize, + pub reason: String, + pub interaction: Interaction, +} + +pub struct DatastoreSimulatorSubsystem; + +impl DstSubsystem for DatastoreSimulatorSubsystem { + type Case = DatastoreSimulatorCase; + type Event = DatastoreSimulatorEvent; + type Outcome = DatastoreSimulatorOutcome; + + fn name() -> &'static str { + "datastore-simulator" + } + + fn generate_case(seed: DstSeed) -> Self::Case { + generate_case(seed) + } + + fn run_case(case: &Self::Case) -> anyhow::Result> { + run_case_detailed(case).map_err(|failure| { + anyhow::anyhow!( + "datastore simulator failed at step {}: {}", + failure.step_index, + failure.reason + ) + }) + } +} + +pub fn generate_case(seed: DstSeed) -> DatastoreSimulatorCase { + let mut rng = seed.fork(17).rng(); + let num_connections = rng.index(3) + 1; + let schema = generate_schema(&mut rng); + let interactions = generate_interactions(seed, &schema, num_connections, &mut rng); + DatastoreSimulatorCase { + seed, + num_connections, + schema, + interactions, + } +} + +pub fn run_case_detailed( + case: &DatastoreSimulatorCase, +) -> Result< + RunRecord, + DatastoreExecutionFailure, +> { + let datastore = bootstrap_datastore().map_err(|err| failure_without_step(format!("bootstrap failed: {err}")))?; + let table_ids = install_schema(&datastore, &case.schema) + .map_err(|err| failure_without_step(format!("schema install failed: {err}")))?; + let mut trace = Trace::default(); + let mut connections: Vec> = (0..case.num_connections).map(|_| None).collect(); + + for (step_index, interaction) in case.interactions.iter().cloned().enumerate() { + trace.push(DatastoreSimulatorEvent::Executed(interaction.clone())); + execute_interaction(&datastore, &table_ids, &mut connections, &interaction).map_err(|reason| { + DatastoreExecutionFailure { + step_index, + reason, + interaction, + } + })?; + } + + for tx in &mut connections { + if let Some(tx) = tx.take() { + let _ = datastore.rollback_mut_tx(tx); + } + } + + let outcome = collect_outcome(&datastore, &table_ids) + .map_err(|err| failure_without_step(format!("collect outcome failed: {err}")))?; + let expected_rows = expected_committed_rows(case); + if outcome.final_rows != expected_rows { + return Err(failure_without_step(format!( + "final datastore state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + ))); + } + + Ok(RunRecord { + subsystem: DatastoreSimulatorSubsystem::name(), + seed: case.seed, + case: case.clone(), + trace: Some(trace), + outcome, + }) +} + +pub fn save_case(path: impl AsRef, case: &DatastoreSimulatorCase) -> anyhow::Result<()> { + let body = serde_json::to_string_pretty(case)?; + fs::write(path, body)?; + Ok(()) +} + +pub fn load_case(path: impl AsRef) -> anyhow::Result { + let body = fs::read_to_string(path)?; + Ok(serde_json::from_str(&body)?) +} + +pub fn failure_reason(case: &DatastoreSimulatorCase) -> anyhow::Result { + match run_case_detailed(case) { + Ok(_) => anyhow::bail!("case did not fail"), + Err(failure) => Ok(failure.reason), + } +} + +fn generate_schema(rng: &mut DstRng) -> SchemaPlan { + let table_count = rng.index(3) + 1; + let mut tables = Vec::with_capacity(table_count); + + for table_idx in 0..table_count { + let extra_cols = rng.index(3); + let mut columns = vec![ColumnPlan { + name: "id".into(), + kind: ColumnKind::U64, + }]; + for col_idx in 0..extra_cols { + columns.push(ColumnPlan { + name: format!("c{table_idx}_{col_idx}"), + kind: match rng.index(3) { + 0 => ColumnKind::U64, + 1 => ColumnKind::String, + _ => ColumnKind::Bool, + }, + }); + } + let secondary_index_col = (columns.len() > 1 && rng.index(100) < 50).then_some(1); + tables.push(TablePlan { + name: format!("dst_table_{table_idx}_{}", rng.next_u64() % 10_000), + columns, + secondary_index_col, + }); + } + + SchemaPlan { tables } +} + +fn generate_interactions( + seed: DstSeed, + schema: &SchemaPlan, + num_connections: usize, + rng: &mut DstRng, +) -> Vec { + let mut plan = Vec::new(); + let mut model = GenerationModel::new(schema, num_connections, seed); + let target_ops = 24 + rng.index(24); + + while plan.len() < target_ops { + let conn = model.open_tx_conn().unwrap_or_else(|| rng.index(num_connections)); + + if !model.connections[conn].in_tx && model.open_tx_conn().is_none() && rng.index(100) < 20 { + model.connections[conn].in_tx = true; + plan.push(Interaction::BeginTx { conn }); + continue; + } + + if model.connections[conn].in_tx && rng.index(100) < 15 { + let followups = model.commit(conn); + plan.push(Interaction::CommitTx { conn }); + plan.extend(followups); + continue; + } + + if model.connections[conn].in_tx && rng.index(100) < 10 { + let followups = model.rollback(conn); + plan.push(Interaction::RollbackTx { conn }); + plan.extend(followups); + continue; + } + + let table = rng.index(schema.tables.len()); + let visible_rows = model.visible_rows(conn, table); + let choose_insert = visible_rows.is_empty() || rng.index(100) < 65; + if choose_insert { + let row = model.make_row(rng, table); + model.insert(conn, table, row.clone()); + plan.push(Interaction::Insert { + conn, + table, + row: row.clone(), + }); + plan.push(Interaction::AssertVisibleInConnection { conn, table, row }); + if !model.connections[conn].in_tx { + let row = model.last_inserted_row(conn).expect("tracked auto-commit insert"); + plan.push(Interaction::AssertVisibleFresh { table, row }); + } + } else { + let row = visible_rows[rng.index(visible_rows.len())].clone(); + model.delete(conn, table, row.clone()); + plan.push(Interaction::Delete { + conn, + table, + row: row.clone(), + }); + plan.push(Interaction::AssertMissingInConnection { + conn, + table, + row: row.clone(), + }); + if !model.connections[conn].in_tx { + plan.push(Interaction::AssertMissingFresh { table, row }); + } + } + } + + for conn in 0..num_connections { + if model.connections[conn].in_tx { + let followups = model.commit(conn); + plan.push(Interaction::CommitTx { conn }); + plan.extend(followups); + } + } + + plan +} + +fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result> { + let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + let mut table_ids = Vec::with_capacity(schema.tables.len()); + + for table in &schema.tables { + let columns = table + .columns + .iter() + .enumerate() + .map(|(idx, col)| ColumnSchema::for_test(idx as u16, &col.name, col.kind.to_algebraic_type())) + .collect::>(); + + let mut indexes = vec![IndexSchema::for_test( + format!("{}_id_idx", table.name), + BTreeAlgorithm::from(0), + )]; + if let Some(col) = table.secondary_index_col { + indexes.push(IndexSchema::for_test( + format!("{}_c{col}_idx", table.name), + BTreeAlgorithm::from(col), + )); + } + let constraints = vec![ConstraintSchema::unique_for_test( + format!("{}_id_unique", table.name), + 0, + )]; + + let table_id = datastore.create_table_mut_tx( + &mut tx, + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(&table.name), + None, + columns, + indexes, + constraints, + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ), + )?; + table_ids.push(table_id); + } + + datastore.commit_mut_tx(tx)?; + Ok(table_ids) +} + +fn execute_interaction( + datastore: &Locking, + table_ids: &[TableId], + connections: &mut [Option], + interaction: &Interaction, +) -> Result<(), String> { + match interaction { + Interaction::BeginTx { conn } => { + let slot = connections + .get_mut(*conn) + .ok_or_else(|| format!("connection {conn} out of range"))?; + if slot.is_some() { + return Err(format!("connection {conn} already has open transaction")); + } + *slot = Some(datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests)); + } + Interaction::CommitTx { conn } => { + let tx = connections + .get_mut(*conn) + .ok_or_else(|| format!("connection {conn} out of range"))? + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; + datastore + .commit_mut_tx(tx) + .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; + } + Interaction::RollbackTx { conn } => { + let tx = connections + .get_mut(*conn) + .ok_or_else(|| format!("connection {conn} out of range"))? + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; + let _ = datastore.rollback_mut_tx(tx); + } + Interaction::Insert { conn, table, row } => { + with_mut_tx( + datastore, + table_ids, + connections, + *conn, + *table, + |datastore, table_id, tx| { + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + datastore + .insert_mut_tx(tx, table_id, &bsatn) + .map_err(|err| format!("insert failed: {err}"))?; + Ok(()) + }, + )?; + } + Interaction::Delete { conn, table, row } => { + with_mut_tx( + datastore, + table_ids, + connections, + *conn, + *table, + |datastore, table_id, tx| { + let deleted = datastore.delete_by_rel_mut_tx(tx, table_id, [row.to_product_value()]); + if deleted != 1 { + return Err(format!("delete expected 1 row, got {deleted}")); + } + Ok(()) + }, + )?; + } + Interaction::AssertVisibleInConnection { conn, table, row } => { + let table_id = *table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = if let Some(Some(tx)) = connections.get(*conn) { + datastore + .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .any(|candidate| candidate == *row) + } else { + fresh_lookup(datastore, table_id, id).map_err(|err| format!("fresh lookup failed: {err}"))? + == Some(row.clone()) + }; + if !found { + return Err(format!("row not visible in connection after write: {row:?}")); + } + } + Interaction::AssertMissingInConnection { conn, table, row } => { + let table_id = *table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = if let Some(Some(tx)) = connections.get(*conn) { + datastore + .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .next() + .is_some() + } else { + fresh_lookup(datastore, table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))? + .is_some() + }; + if found { + return Err(format!("row still visible in connection after delete: {row:?}")); + } + } + Interaction::AssertVisibleFresh { table, row } => { + let table_id = *table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = fresh_lookup(datastore, table_id, id).map_err(|err| format!("fresh lookup failed: {err}"))?; + if found != Some(row.clone()) { + return Err(format!("fresh lookup mismatch: expected={row:?} actual={found:?}")); + } + } + Interaction::AssertMissingFresh { table, row } => { + let table_id = *table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if fresh_lookup(datastore, table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))? + .is_some() + { + return Err(format!("fresh lookup still found deleted row: {row:?}")); + } + } + Interaction::AssertRowCountFresh { table, expected } => { + let table_id = *table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let actual = datastore.begin_tx(Workload::ForTests).row_count(table_id); + if actual != *expected { + return Err(format!("row count mismatch: expected={expected} actual={actual}")); + } + } + } + + Ok(()) +} + +fn with_mut_tx( + datastore: &Locking, + table_ids: &[TableId], + connections: &mut [Option], + conn: usize, + table: usize, + mut f: impl FnMut(&Locking, TableId, &mut MutTxId) -> Result<(), String>, +) -> Result<(), String> { + let table_id = *table_ids + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let slot = connections + .get_mut(conn) + .ok_or_else(|| format!("connection {conn} out of range"))?; + + match slot { + Some(tx) => f(datastore, table_id, tx), + None => { + let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + f(datastore, table_id, &mut tx)?; + datastore + .commit_mut_tx(tx) + .map_err(|err| format!("auto-commit failed on connection {conn}: {err}"))?; + Ok(()) + } + } +} + +fn fresh_lookup(datastore: &Locking, table_id: TableId, id: u64) -> anyhow::Result> { + let tx = datastore.begin_tx(Workload::ForTests); + Ok(tx + .table_scan(table_id)? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .find(|row| row.id() == Some(id))) +} + +fn collect_outcome(datastore: &Locking, table_ids: &[TableId]) -> anyhow::Result { + let tx = datastore.begin_tx(Workload::ForTests); + let mut final_rows = Vec::with_capacity(table_ids.len()); + let mut final_row_counts = Vec::with_capacity(table_ids.len()); + + for &table_id in table_ids { + let mut rows = tx + .table_scan(table_id)? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + final_row_counts.push(rows.len() as u64); + final_rows.push(rows); + } + + Ok(DatastoreSimulatorOutcome { + final_row_counts, + final_rows, + }) +} + +fn expected_committed_rows(case: &DatastoreSimulatorCase) -> Vec> { + let mut model = ExpectedModel::new(case.schema.tables.len(), case.num_connections); + for interaction in &case.interactions { + model.apply(interaction); + } + let mut rows = model.committed; + for table_rows in &mut rows { + table_rows.sort_by_key(|row| row.id().unwrap_or_default()); + } + rows +} + +fn failure_without_step(reason: String) -> DatastoreExecutionFailure { + DatastoreExecutionFailure { + step_index: usize::MAX, + reason, + interaction: Interaction::AssertRowCountFresh { + table: usize::MAX, + expected: 0, + }, + } +} + +impl ColumnKind { + fn to_algebraic_type(&self) -> AlgebraicType { + match self { + ColumnKind::U64 => AlgebraicType::U64, + ColumnKind::String => AlgebraicType::String, + ColumnKind::Bool => AlgebraicType::Bool, + } + } +} + +impl SimValue { + fn to_algebraic_value(&self) -> AlgebraicValue { + match self { + SimValue::U64(value) => AlgebraicValue::U64(*value), + SimValue::String(value) => AlgebraicValue::String(value.clone().into()), + SimValue::Bool(value) => AlgebraicValue::Bool(*value), + } + } + + fn from_algebraic_value(value: AlgebraicValue) -> Self { + match value { + AlgebraicValue::U64(value) => SimValue::U64(value), + AlgebraicValue::String(value) => SimValue::String(value.to_string()), + AlgebraicValue::Bool(value) => SimValue::Bool(value), + other => panic!("unsupported value in simulator row: {other:?}"), + } + } +} + +impl SimRow { + fn to_product_value(&self) -> ProductValue { + ProductValue::from_iter(self.values.iter().map(SimValue::to_algebraic_value)) + } + + fn to_bsatn(&self) -> anyhow::Result> { + Ok(spacetimedb_sats::bsatn::to_vec(&self.to_product_value())?) + } + + fn from_product_value(value: ProductValue) -> Self { + SimRow { + values: value.elements.into_iter().map(SimValue::from_algebraic_value).collect(), + } + } + + fn id(&self) -> Option { + match self.values.first() { + Some(SimValue::U64(value)) => Some(*value), + _ => None, + } + } +} + +#[derive(Clone, Debug)] +struct GenerationModel { + schema: SchemaPlan, + connections: Vec, + committed: Vec>, + next_ids: Vec, +} + +#[derive(Clone, Debug, Default)] +struct PendingConnection { + in_tx: bool, + staged_inserts: Vec<(usize, SimRow)>, + staged_deletes: Vec<(usize, SimRow)>, + last_auto_committed_insert: Option, +} + +impl GenerationModel { + fn new(schema: &SchemaPlan, num_connections: usize, seed: DstSeed) -> Self { + Self { + schema: schema.clone(), + connections: vec![PendingConnection::default(); num_connections], + committed: vec![Vec::new(); schema.tables.len()], + next_ids: (0..schema.tables.len()) + .map(|idx| seed.fork(idx as u64 + 100).0) + .collect(), + } + } + + fn make_row(&mut self, rng: &mut DstRng, table: usize) -> SimRow { + let table_plan = &self.schema.tables[table]; + let id = self.next_ids[table]; + self.next_ids[table] = self.next_ids[table].wrapping_add(1).max(1); + let mut values = vec![SimValue::U64(id)]; + for (idx, col) in table_plan.columns.iter().enumerate().skip(1) { + values.push(match col.kind { + ColumnKind::U64 => SimValue::U64((rng.next_u64() % 1000) + idx as u64), + ColumnKind::String => SimValue::String(format!("v{}_{}", idx, rng.next_u64() % 10_000)), + ColumnKind::Bool => SimValue::Bool(rng.index(2) == 0), + }); + } + SimRow { values } + } + + fn visible_rows(&self, conn: usize, table: usize) -> Vec { + let mut rows = self.committed[table].clone(); + let pending = &self.connections[conn]; + for (pending_table, row) in &pending.staged_deletes { + if *pending_table == table { + rows.retain(|candidate| candidate != row); + } + } + for (pending_table, row) in &pending.staged_inserts { + if *pending_table == table { + rows.push(row.clone()); + } + } + rows + } + + fn open_tx_conn(&self) -> Option { + self.connections.iter().position(|conn| conn.in_tx) + } + + fn insert(&mut self, conn: usize, table: usize, row: SimRow) { + let pending = &mut self.connections[conn]; + if pending.in_tx { + pending.staged_inserts.push((table, row)); + } else { + self.committed[table].push(row.clone()); + pending.last_auto_committed_insert = Some(row); + } + } + + fn last_inserted_row(&self, conn: usize) -> Option { + self.connections[conn].last_auto_committed_insert.clone() + } + + fn delete(&mut self, conn: usize, table: usize, row: SimRow) { + let pending = &mut self.connections[conn]; + if pending.in_tx { + pending + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == table && *candidate == row)); + pending.staged_deletes.push((table, row)); + } else { + self.committed[table].retain(|candidate| *candidate != row); + } + } + + fn commit(&mut self, conn: usize) -> Vec { + let pending = &mut self.connections[conn]; + let inserts = std::mem::take(&mut pending.staged_inserts); + let deletes = std::mem::take(&mut pending.staged_deletes); + pending.in_tx = false; + + for (table, row) in &deletes { + self.committed[*table].retain(|candidate| candidate != row); + } + for (table, row) in &inserts { + self.committed[*table].push(row.clone()); + } + + let mut followups = Vec::new(); + for (table, row) in inserts { + followups.push(Interaction::AssertVisibleFresh { table, row }); + } + for (table, row) in deletes { + followups.push(Interaction::AssertMissingFresh { table, row }); + } + followups + } + + fn rollback(&mut self, conn: usize) -> Vec { + let pending = &mut self.connections[conn]; + let touched_tables = pending + .staged_inserts + .iter() + .chain(pending.staged_deletes.iter()) + .map(|(table, _)| *table) + .collect::>(); + pending.staged_inserts.clear(); + pending.staged_deletes.clear(); + pending.in_tx = false; + touched_tables + .into_iter() + .map(|table| Interaction::AssertRowCountFresh { + table, + expected: self.committed[table].len() as u64, + }) + .collect() + } +} + +#[derive(Clone, Debug)] +struct ExpectedModel { + committed: Vec>, + connections: Vec, +} + +#[derive(Clone, Debug, Default)] +struct ExpectedConnection { + in_tx: bool, + staged_inserts: Vec<(usize, SimRow)>, + staged_deletes: Vec<(usize, SimRow)>, +} + +impl ExpectedModel { + fn new(table_count: usize, connection_count: usize) -> Self { + Self { + committed: vec![Vec::new(); table_count], + connections: vec![ExpectedConnection::default(); connection_count], + } + } + + fn apply(&mut self, interaction: &Interaction) { + match interaction { + Interaction::BeginTx { conn } => self.connections[*conn].in_tx = true, + Interaction::CommitTx { conn } => { + let state = &mut self.connections[*conn]; + for (table, row) in state.staged_deletes.drain(..) { + self.committed[table].retain(|candidate| *candidate != row); + } + for (table, row) in state.staged_inserts.drain(..) { + self.committed[table].push(row); + } + state.in_tx = false; + } + Interaction::RollbackTx { conn } => { + let state = &mut self.connections[*conn]; + state.staged_inserts.clear(); + state.staged_deletes.clear(); + state.in_tx = false; + } + Interaction::Insert { conn, table, row } => { + let state = &mut self.connections[*conn]; + if state.in_tx { + state.staged_inserts.push((*table, row.clone())); + } else { + self.committed[*table].push(row.clone()); + } + } + Interaction::Delete { conn, table, row } => { + let state = &mut self.connections[*conn]; + if state.in_tx { + state + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == *table && *candidate == *row)); + state.staged_deletes.push((*table, row.clone())); + } else { + self.committed[*table].retain(|candidate| *candidate != *row); + } + } + Interaction::AssertVisibleInConnection { .. } + | Interaction::AssertMissingInConnection { .. } + | Interaction::AssertVisibleFresh { .. } + | Interaction::AssertMissingFresh { .. } + | Interaction::AssertRowCountFresh { .. } => {} + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::{Mutex, OnceLock}; + + use pretty_assertions::assert_eq; + use proptest::prelude::*; + + use crate::{ + runner::{rerun_case, run_generated, verify_repeatable_execution}, + seed::DstSeed, + }; + + use super::{ + failure_reason, run_case_detailed, ColumnKind, ColumnPlan, DatastoreSimulatorCase, DatastoreSimulatorSubsystem, + Interaction, SchemaPlan, SimRow, SimValue, TablePlan, + }; + + fn test_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + } + + #[test] + fn generated_case_replays_identically() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + let artifact = run_generated::(DstSeed(13)).expect("run datastore simulator case"); + let replayed = rerun_case::(&artifact).expect("rerun datastore simulator case"); + assert_eq!(artifact.case, replayed.case); + assert_eq!(artifact.trace, replayed.trace); + assert_eq!(artifact.outcome, replayed.outcome); + } + + #[test] + fn generated_case_has_repeatable_execution() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + let artifact = run_generated::(DstSeed(23)).expect("run datastore simulator case"); + let replayed = + verify_repeatable_execution::(&artifact).expect("verify repeatable execution"); + assert_eq!(artifact.trace, replayed.trace); + assert_eq!(artifact.outcome, replayed.outcome); + } + + #[test] + fn failure_reports_stable_reason() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + let case = failing_case(); + let failure = run_case_detailed(&case).expect_err("case should fail"); + assert_eq!(failure.step_index, 2); + assert!(failure.reason.contains("fresh lookup still found deleted row")); + assert_eq!(failure_reason(&case).expect("extract failure reason"), failure.reason); + } + + proptest! { + #[test] + fn datastore_simulator_holds_across_generated_seeds(seed in any::()) { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + run_generated::(DstSeed(seed)) + .unwrap_or_else(|err| panic!("seed {seed} failed: {err}")); + } + } + + fn failing_case() -> DatastoreSimulatorCase { + DatastoreSimulatorCase { + seed: DstSeed(99), + num_connections: 1, + schema: SchemaPlan { + tables: vec![TablePlan { + name: "bugs".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + kind: ColumnKind::U64, + }, + ColumnPlan { + name: "name".into(), + kind: ColumnKind::String, + }, + ], + secondary_index_col: Some(1), + }], + }, + interactions: vec![ + Interaction::Insert { + conn: 0, + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + Interaction::AssertVisibleFresh { + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + Interaction::AssertMissingFresh { + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + ], + } + } +} diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index 398cea70545..81d4d95fdac 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -1,7 +1,10 @@ +pub mod bugbase; pub mod datastore; +pub mod datastore_sim; pub mod runner; pub mod scheduler; pub mod seed; +pub mod shrink; pub mod subsystem; pub mod sync; pub mod trace; diff --git a/crates/dst/src/seed.rs b/crates/dst/src/seed.rs index f2a08162609..4bf9aa53c39 100644 --- a/crates/dst/src/seed.rs +++ b/crates/dst/src/seed.rs @@ -1,4 +1,6 @@ -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] pub struct DstSeed(pub u64); impl DstSeed { diff --git a/crates/dst/src/shrink.rs b/crates/dst/src/shrink.rs new file mode 100644 index 00000000000..64122c4a8e1 --- /dev/null +++ b/crates/dst/src/shrink.rs @@ -0,0 +1,115 @@ +use crate::datastore_sim::{failure_reason, DatastoreExecutionFailure, DatastoreSimulatorCase, Interaction}; + +pub fn shrink_failure( + case: &DatastoreSimulatorCase, + failure: &DatastoreExecutionFailure, +) -> anyhow::Result { + let mut shrunk = case.clone(); + shrunk.interactions.truncate(failure.step_index.saturating_add(1)); + let target_reason = failure.reason.clone(); + + let mut changed = true; + while changed { + changed = false; + for idx in (0..shrunk.interactions.len()).rev() { + let Some(candidate) = remove_interaction(&shrunk, idx) else { + continue; + }; + if failure_reason(&candidate).ok().as_ref() == Some(&target_reason) { + shrunk = candidate; + changed = true; + } + } + } + + Ok(shrunk) +} + +fn remove_interaction(case: &DatastoreSimulatorCase, idx: usize) -> Option { + let interaction = case.interactions.get(idx)?; + if matches!( + interaction, + Interaction::CommitTx { .. } | Interaction::RollbackTx { .. } + ) { + return None; + } + + let mut interactions = case.interactions.clone(); + interactions.remove(idx); + Some(DatastoreSimulatorCase { + seed: case.seed, + num_connections: case.num_connections, + schema: case.schema.clone(), + interactions, + }) +} + +#[cfg(test)] +mod tests { + use crate::{ + datastore_sim::{ + run_case_detailed, ColumnKind, ColumnPlan, DatastoreSimulatorCase, Interaction, SchemaPlan, SimRow, + SimValue, TablePlan, + }, + seed::DstSeed, + shrink::shrink_failure, + }; + + #[test] + fn shrink_drops_trailing_noise() { + let case = DatastoreSimulatorCase { + seed: DstSeed(77), + num_connections: 1, + schema: SchemaPlan { + tables: vec![TablePlan { + name: "bugs".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + kind: ColumnKind::U64, + }, + ColumnPlan { + name: "name".into(), + kind: ColumnKind::String, + }, + ], + secondary_index_col: Some(1), + }], + }, + interactions: vec![ + Interaction::Insert { + conn: 0, + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + Interaction::AssertVisibleFresh { + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + Interaction::AssertMissingFresh { + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + Interaction::Insert { + conn: 0, + table: 0, + row: SimRow { + values: vec![SimValue::U64(2), SimValue::String("two".into())], + }, + }, + ], + }; + + let failure = run_case_detailed(&case).expect_err("case should fail"); + let shrunk = shrink_failure(&case, &failure).expect("shrink failure"); + assert!(shrunk.interactions.len() < case.interactions.len()); + let shrunk_failure = run_case_detailed(&shrunk).expect_err("shrunk case should still fail"); + assert_eq!(shrunk_failure.reason, failure.reason); + } +} From 70b16a0c0bad6484c3df50fbc8c81b764482bf10 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 21 Apr 2026 15:30:44 +0530 Subject: [PATCH 06/74] code organisation --- crates/dst/src/bugbase.rs | 80 +- crates/dst/src/datastore.rs | 484 ------------ crates/dst/src/lib.rs | 48 +- crates/dst/src/runner.rs | 13 + crates/dst/src/seed.rs | 8 + crates/dst/src/shrink.rs | 123 +-- crates/dst/src/sim/mod.rs | 4 + crates/dst/src/{ => sim}/scheduler.rs | 10 + crates/dst/src/{ => sim}/sync.rs | 8 + crates/dst/src/subsystem.rs | 15 + .../datastore.rs} | 714 ++++++++++++++---- crates/dst/src/targets/mod.rs | 3 + crates/dst/src/trace.rs | 8 + 13 files changed, 731 insertions(+), 787 deletions(-) delete mode 100644 crates/dst/src/datastore.rs create mode 100644 crates/dst/src/sim/mod.rs rename crates/dst/src/{ => sim}/scheduler.rs (86%) rename crates/dst/src/{ => sim}/sync.rs (88%) rename crates/dst/src/{datastore_sim.rs => targets/datastore.rs} (56%) create mode 100644 crates/dst/src/targets/mod.rs diff --git a/crates/dst/src/bugbase.rs b/crates/dst/src/bugbase.rs index b5823ec1596..f045c9fde13 100644 --- a/crates/dst/src/bugbase.rs +++ b/crates/dst/src/bugbase.rs @@ -1,81 +1,27 @@ -use std::{fs, path::Path}; +//! Generic persistence helpers for failure artifacts. -use serde::{Deserialize, Serialize}; +use std::{fs, path::Path}; -use crate::datastore_sim::{DatastoreExecutionFailure, DatastoreSimulatorCase}; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; +/// Generic persisted failure artifact for one deterministic run. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct DatastoreBugArtifact { +pub struct BugArtifact { pub seed: u64, - pub failure: DatastoreExecutionFailure, - pub case: DatastoreSimulatorCase, - pub shrunk_case: Option, + pub failure: F, + pub case: C, + pub shrunk_case: Option, } -pub fn save_bug_artifact(path: impl AsRef, artifact: &DatastoreBugArtifact) -> anyhow::Result<()> { - let body = serde_json::to_string_pretty(artifact)?; +/// Writes any serializable value to disk as pretty JSON. +pub fn save_json(path: impl AsRef, value: &T) -> anyhow::Result<()> { + let body = serde_json::to_string_pretty(value)?; fs::write(path, body)?; Ok(()) } -pub fn load_bug_artifact(path: impl AsRef) -> anyhow::Result { +/// Loads any JSON value written by [`save_json`]. +pub fn load_json(path: impl AsRef) -> anyhow::Result { let body = fs::read_to_string(path)?; Ok(serde_json::from_str(&body)?) } - -#[cfg(test)] -mod tests { - use tempfile::tempdir; - - use crate::{ - bugbase::{load_bug_artifact, save_bug_artifact, DatastoreBugArtifact}, - datastore_sim::{ - run_case_detailed, ColumnKind, ColumnPlan, DatastoreSimulatorCase, Interaction, SchemaPlan, SimRow, - SimValue, TablePlan, - }, - seed::DstSeed, - }; - - #[test] - fn bug_artifact_roundtrips() { - let dir = tempdir().expect("create tempdir"); - let path = dir.path().join("bug.json"); - let case = DatastoreSimulatorCase { - seed: DstSeed(5), - num_connections: 1, - schema: SchemaPlan { - tables: vec![TablePlan { - name: "bugs".into(), - columns: vec![ - ColumnPlan { - name: "id".into(), - kind: ColumnKind::U64, - }, - ColumnPlan { - name: "ok".into(), - kind: ColumnKind::Bool, - }, - ], - secondary_index_col: Some(1), - }], - }, - interactions: vec![Interaction::AssertVisibleFresh { - table: 0, - row: SimRow { - values: vec![SimValue::U64(7), SimValue::Bool(true)], - }, - }], - }; - let failure = run_case_detailed(&case).expect_err("case should fail"); - let artifact = DatastoreBugArtifact { - seed: case.seed.0, - failure, - case: case.clone(), - shrunk_case: Some(case), - }; - - save_bug_artifact(&path, &artifact).expect("save artifact"); - let loaded = load_bug_artifact(&path).expect("load artifact"); - assert_eq!(loaded, artifact); - } -} diff --git a/crates/dst/src/datastore.rs b/crates/dst/src/datastore.rs deleted file mode 100644 index ae3d423d0af..00000000000 --- a/crates/dst/src/datastore.rs +++ /dev/null @@ -1,484 +0,0 @@ -use std::{sync::mpsc, thread}; - -/// TODO: make this stream-based. -/// -use spacetimedb_datastore::{ - execution_context::Workload, - locking_tx_datastore::{ - datastore::Locking, - lock_trace::{install_lock_event_hook, LockEvent, LockEventKind}, - MutTxId, - }, - traits::{IsolationLevel, MutTx, MutTxDatastore, Tx}, -}; -use spacetimedb_execution::Datastore as _; -use spacetimedb_lib::{ - db::auth::{StAccess, StTableType}, - Identity, -}; -use spacetimedb_primitives::TableId; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; -use spacetimedb_schema::{ - schema::{ColumnSchema, TableSchema}, - table_name::TableName, -}; -use spacetimedb_table::page_pool::PagePool; - -use crate::{ - seed::{DstRng, DstSeed}, - subsystem::{assert_invariants, DstSubsystem, Invariant, RunRecord}, - trace::Trace, -}; - -pub fn bootstrap_datastore() -> spacetimedb_datastore::Result { - Locking::bootstrap(Identity::ZERO, PagePool::new_for_test()) -} - -pub fn basic_table_schema(name: &str) -> TableSchema { - TableSchema::new( - TableId::SENTINEL, - TableName::for_test(name), - None, - vec![ - ColumnSchema::for_test(0, "id", AlgebraicType::U64), - ColumnSchema::for_test(1, "name", AlgebraicType::String), - ], - vec![], - vec![], - vec![], - StTableType::User, - StAccess::Public, - None, - None, - false, - None, - ) -} - -pub fn create_table(datastore: &Locking, schema: TableSchema) -> spacetimedb_datastore::Result { - let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - let table_id = datastore.create_table_mut_tx(&mut tx, schema)?; - datastore.commit_mut_tx(tx)?; - Ok(table_id) -} - -pub fn insert_row(datastore: &Locking, table_id: TableId, id: u64, name: &str) -> spacetimedb_datastore::Result<()> { - let row = ProductValue::from_iter([AlgebraicValue::U64(id), AlgebraicValue::String(name.into())]); - let bytes = spacetimedb_sats::bsatn::to_vec(&row).map_err(anyhow::Error::from)?; - let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - datastore.insert_mut_tx(&mut tx, table_id, &bytes)?; - datastore.commit_mut_tx(tx)?; - Ok(()) -} - -pub fn observe_lock_events(hook: F, body: impl FnOnce() -> R) -> R -where - F: Fn(LockEvent) + Send + Sync + 'static, -{ - let _guard = install_lock_event_hook(hook); - body() -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct DatastoreCase { - pub seed: DstSeed, - pub baseline: BaselinePlan, - pub hold_reader_during_writer_start: bool, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct DatastoreOutcome { - pub baseline_row_count: u64, - pub final_row_count: u64, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct BaselinePlan { - pub schema: SchemaPlan, - pub setup: Vec, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct SchemaPlan { - pub table_name: String, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct SetupTxn { - pub ops: Vec, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum SetupOp { - Insert { id: u64, name: String }, - DeleteExisting { id: u64, name: String }, -} - -#[derive(Clone, Debug, Default)] -struct BaselineModel { - existing_rows: Vec<(u64, String)>, - next_id: u64, -} - -pub struct DatastoreSubsystem; - -impl DstSubsystem for DatastoreSubsystem { - type Case = DatastoreCase; - type Event = LockEvent; - type Outcome = DatastoreOutcome; - - fn name() -> &'static str { - "datastore" - } - - fn generate_case(seed: DstSeed) -> Self::Case { - let mut rng = seed.fork(1).rng(); - DatastoreCase { - seed, - baseline: generate_baseline_plan(&mut rng), - hold_reader_during_writer_start: true, - } - } - - fn run_case(case: &Self::Case) -> anyhow::Result> { - let datastore = bootstrap_datastore()?; - let table_id = apply_baseline_plan(&datastore, &case.baseline)?; - - let (tx, rx) = mpsc::channel::(); - - let trace_events = observe_lock_events( - move |event| { - tx.send(event).expect("send lock event"); - }, - || -> anyhow::Result> { - let read_tx = case - .hold_reader_during_writer_start - .then(|| datastore.begin_tx(Workload::ForTests)); - let datastore_for_writer = datastore.clone(); - - let writer = thread::spawn(move || { - let write_tx = datastore_for_writer.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - let _ = datastore_for_writer.rollback_mut_tx(write_tx); - }); - - let mut events = Vec::new(); - while !events - .iter() - .any(|event: &LockEvent| event.kind == LockEventKind::BeginWriteRequested) - { - events.push(rx.recv()?); - } - - if let Some(read_tx) = read_tx { - drop(read_tx); - while !events - .iter() - .any(|event: &LockEvent| event.kind == LockEventKind::BeginWriteAcquired) - { - events.push(rx.recv()?); - } - } - - writer.join().expect("writer join"); - Ok(events) - }, - )?; - let trace_events = normalize_lock_events(trace_events); - - let baseline_row_count = datastore.begin_tx(Workload::ForTests).row_count(table_id); - let final_row_count = datastore.begin_tx(Workload::ForTests).row_count(table_id); - - let artifact = RunRecord { - subsystem: Self::name(), - seed: case.seed, - case: case.clone(), - trace: Some(Trace::from_events(trace_events)), - outcome: DatastoreOutcome { - baseline_row_count, - final_row_count, - }, - }; - - assert_invariants( - &artifact, - &[ - &BaselineMatchesPlan, - &WriterWaitsForReader, - &RollbackPreservesBaseline, - &ReplayableOutcome, - ], - )?; - - Ok(artifact) - } -} - -fn generate_baseline_plan(rng: &mut DstRng) -> BaselinePlan { - let mut model = BaselineModel { - existing_rows: Vec::new(), - next_id: 1, - }; - let tx_count = rng.index(5) + 1; - let mut txns = Vec::with_capacity(tx_count); - - for _ in 0..tx_count { - let op_count = rng.index(3) + 1; - let mut ops = Vec::with_capacity(op_count); - - for _ in 0..op_count { - let op = choose_setup_op(rng, &mut model); - apply_op_to_model(&mut model, &op); - ops.push(op); - } - - txns.push(SetupTxn { ops }); - } - - BaselinePlan { - schema: SchemaPlan { - table_name: format!("dst_case_{}", rng.next_u64() % 10_000), - }, - setup: txns, - } -} - -fn choose_setup_op(rng: &mut DstRng, model: &mut BaselineModel) -> SetupOp { - let can_delete = !model.existing_rows.is_empty(); - let choose_insert = !can_delete || rng.index(100) < 70; - - if choose_insert { - let id = model.next_id; - SetupOp::Insert { - id, - name: format!("row_{}", rng.next_u64() % 1000), - } - } else { - let idx = rng.index(model.existing_rows.len()); - let (id, name) = &model.existing_rows[idx]; - SetupOp::DeleteExisting { - id: *id, - name: name.clone(), - } - } -} - -fn apply_op_to_model(model: &mut BaselineModel, op: &SetupOp) { - match op { - SetupOp::Insert { id, name } => { - model.existing_rows.push((*id, name.clone())); - model.next_id = model.next_id.max(id + 1); - } - SetupOp::DeleteExisting { id, .. } => { - if let Some(pos) = model - .existing_rows - .iter() - .position(|(existing_id, _)| existing_id == id) - { - model.existing_rows.remove(pos); - } - } - } -} - -fn apply_baseline_plan(datastore: &Locking, plan: &BaselinePlan) -> anyhow::Result { - let table_id = create_table(datastore, basic_table_schema(&plan.schema.table_name))?; - - for txn in &plan.setup { - let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - for op in &txn.ops { - apply_setup_op(datastore, &mut tx, table_id, op)?; - } - datastore.commit_mut_tx(tx)?; - } - - Ok(table_id) -} - -fn apply_setup_op(datastore: &Locking, tx: &mut MutTxId, table_id: TableId, op: &SetupOp) -> anyhow::Result<()> { - match op { - SetupOp::Insert { id, name } => { - let row = ProductValue::from_iter([AlgebraicValue::U64(*id), AlgebraicValue::String(name.clone().into())]); - let bytes = spacetimedb_sats::bsatn::to_vec(&row)?; - datastore.insert_mut_tx(tx, table_id, &bytes)?; - } - SetupOp::DeleteExisting { id, name } => { - let row = ProductValue::from_iter([AlgebraicValue::U64(*id), AlgebraicValue::String(name.clone().into())]); - let _ = datastore.delete_by_rel_mut_tx(tx, table_id, [row]); - } - } - Ok(()) -} - -struct WriterWaitsForReader; - -impl Invariant> for WriterWaitsForReader { - fn name(&self) -> &'static str { - "writer-waits-for-reader" - } - - fn check(&self, run: &RunRecord) -> anyhow::Result<()> { - if !run.case.hold_reader_during_writer_start { - return Ok(()); - } - - let trace = run - .trace - .as_ref() - .ok_or_else(|| anyhow::anyhow!("missing diagnostic trace"))?; - let write_requested = trace - .as_slice() - .iter() - .position(|event| event.event.kind == LockEventKind::BeginWriteRequested) - .ok_or_else(|| anyhow::anyhow!("missing write request event"))?; - let write_acquired = trace - .as_slice() - .iter() - .position(|event| event.event.kind == LockEventKind::BeginWriteAcquired) - .ok_or_else(|| anyhow::anyhow!("missing write acquired event"))?; - - if write_acquired <= write_requested { - anyhow::bail!("writer acquired before request ordering was established"); - } - Ok(()) - } -} - -struct RollbackPreservesBaseline; - -impl Invariant> for RollbackPreservesBaseline { - fn name(&self) -> &'static str { - "rollback-preserves-baseline" - } - - fn check(&self, run: &RunRecord) -> anyhow::Result<()> { - if run.outcome.baseline_row_count != run.outcome.final_row_count { - anyhow::bail!( - "rollback changed row count: baseline={} final={}", - run.outcome.baseline_row_count, - run.outcome.final_row_count - ); - } - Ok(()) - } -} - -struct ReplayableOutcome; - -impl Invariant> for ReplayableOutcome { - fn name(&self) -> &'static str { - "trace-has-events" - } - - fn check(&self, run: &RunRecord) -> anyhow::Result<()> { - if run.trace.as_ref().is_none_or(|trace| trace.as_slice().is_empty()) { - anyhow::bail!("trace is empty"); - } - Ok(()) - } -} - -struct BaselineMatchesPlan; - -impl Invariant> for BaselineMatchesPlan { - fn name(&self) -> &'static str { - "baseline-matches-plan" - } - - fn check(&self, run: &RunRecord) -> anyhow::Result<()> { - let expected = expected_baseline_rows(&run.case.baseline).len() as u64; - if run.outcome.baseline_row_count != expected { - anyhow::bail!( - "baseline row count mismatch: expected={} actual={}", - expected, - run.outcome.baseline_row_count - ); - } - Ok(()) - } -} - -fn expected_baseline_rows(plan: &BaselinePlan) -> Vec<(u64, String)> { - let mut model = BaselineModel::default(); - for txn in &plan.setup { - for op in &txn.ops { - apply_op_to_model(&mut model, op); - } - } - model.existing_rows -} - -fn normalize_lock_events(events: Vec) -> Vec { - let mut normalized = Vec::with_capacity(events.len()); - for event in events { - let duplicate = normalized - .last() - .is_some_and(|prev: &LockEvent| prev.kind == event.kind); - if !duplicate { - normalized.push(event); - } - } - normalized -} - -#[cfg(test)] -mod tests { - use std::sync::{Mutex, OnceLock}; - - use pretty_assertions::assert_eq; - use proptest::prelude::*; - - use crate::{ - runner::{rerun_case, run_generated, verify_repeatable_execution}, - seed::DstSeed, - }; - - use super::DatastoreSubsystem; - - fn test_lock() -> &'static Mutex<()> { - static LOCK: OnceLock> = OnceLock::new(); - LOCK.get_or_init(|| Mutex::new(())) - } - - #[test] - fn datastore_writer_waits_for_reader() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let artifact = run_generated::(DstSeed(1)).expect("run datastore dst case"); - assert_eq!(artifact.outcome.baseline_row_count, artifact.outcome.final_row_count); - } - - #[test] - fn rerun_reproduces_case_trace_and_outcome() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let artifact = run_generated::(DstSeed(9)).expect("run datastore dst case"); - let replayed = rerun_case::(&artifact).expect("rerun datastore dst case"); - assert_eq!(artifact.case, replayed.case); - assert_eq!(artifact.outcome, replayed.outcome); - assert!(artifact - .trace - .as_ref() - .is_some_and(|trace| !trace.as_slice().is_empty())); - assert!(replayed - .trace - .as_ref() - .is_some_and(|trace| !trace.as_slice().is_empty())); - } - - #[test] - fn observed_trace_verifies_repeatable_execution() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let artifact = run_generated::(DstSeed(11)).expect("run datastore dst case"); - let replayed = - verify_repeatable_execution::(&artifact).expect("verify repeatable execution"); - assert_eq!(artifact.trace, replayed.trace); - assert_eq!(artifact.outcome, replayed.outcome); - } - - proptest! { - #[test] - fn datastore_property_holds_across_generated_seeds(seed in any::()) { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - run_generated::(DstSeed(seed)) - .unwrap_or_else(|err| panic!("seed {seed} failed: {err}")); - } - } -} diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index 81d4d95fdac..b8764c6b4e8 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -1,10 +1,48 @@ +//! Deterministic simulation testing utilities for SpacetimeDB crates. +//! +//! Layout: +//! +//! - Root: harness pieces such as [`seed`], [`trace`], [`subsystem`], and +//! [`runner`]. +//! - Root generic helpers: [`bugbase`] and [`shrink`]. +//! - [`sim`]: reusable simulator primitives such as [`scheduler`] and [`sync`]. +//! - [`targets`]: concrete simulation targets such as [`datastore_sim`]. +//! +//! Reading guide: +//! +//! - Start with [`subsystem`] to understand the common `Case -> Trace -> +//! Outcome` shape used across simulations. +//! - Then read [`runner`] for the small orchestration helpers that generate, +//! run, and replay a case. +//! - Read [`sim`] for reusable simulation building blocks. +//! - For the datastore simulator itself, read [`datastore_sim`] top-down: +//! case format, generator, executor, then the expected-state model used by +//! the final consistency check. +//! - [`bugbase`] and [`shrink`] are the debugging path after a failure. +//! +//! The crate is intentionally a library crate. It exposes reusable pieces for +//! tests and future binaries rather than providing a CLI directly. + +/// Generic persisted failure artifacts and JSON helpers. pub mod bugbase; -pub mod datastore; -pub mod datastore_sim; +/// Small helpers for generating, running, rerunning, and replay-checking cases. pub mod runner; -pub mod scheduler; +/// Stable seed and RNG utilities used to make runs reproducible. pub mod seed; -pub mod shrink; +/// Common traits and result types shared by DST subsystems. pub mod subsystem; -pub mod sync; +/// Trace data structures used to record deterministic execution. pub mod trace; +/// Generic shrinking helpers. +pub mod shrink; +/// Reusable simulation primitives. +pub mod sim; +/// Concrete simulator targets. +pub mod targets; + +/// Higher-level randomized datastore simulator with schema and interaction plans. +pub use targets::datastore as datastore_sim; +/// Generic actor scheduler used by deterministic simulations. +pub use sim::scheduler; +/// Small in-memory synchronization model used by scheduler-oriented tests. +pub use sim::sync; diff --git a/crates/dst/src/runner.rs b/crates/dst/src/runner.rs index 8b2aba32ec1..35b962f3504 100644 --- a/crates/dst/src/runner.rs +++ b/crates/dst/src/runner.rs @@ -1,3 +1,12 @@ +//! Minimal orchestration helpers for deterministic subsystems. +//! +//! These helpers intentionally stay thin: +//! +//! - generate a case from a seed, +//! - run it, +//! - rerun the exact same case, +//! - compare trace and outcome for replayability. + use crate::{ scheduler::{Actor, ScheduleMode, Scheduler}, seed::DstSeed, @@ -5,21 +14,25 @@ use crate::{ trace::Trace, }; +/// Runs generic actors under the seeded scheduler and returns the trace. pub fn run_seeded(actors: Vec, seed: DstSeed) -> Trace { Scheduler::new(actors, ScheduleMode::Seeded, Some(seed.rng())).run_to_completion() } +/// Generates a case from `seed` and executes it once. pub fn run_generated(seed: DstSeed) -> anyhow::Result> { let case = S::generate_case(seed); S::run_case(&case) } +/// Re-executes the exact case stored in a previous run record. pub fn rerun_case( record: &RunRecord, ) -> anyhow::Result> { S::run_case(&record.case) } +/// Re-executes a run and checks that both trace and outcome match. pub fn verify_repeatable_execution( record: &RunRecord, ) -> anyhow::Result> { diff --git a/crates/dst/src/seed.rs b/crates/dst/src/seed.rs index 4bf9aa53c39..b30ad77f102 100644 --- a/crates/dst/src/seed.rs +++ b/crates/dst/src/seed.rs @@ -1,5 +1,12 @@ +//! Stable seed and RNG utilities used across DST runs. +//! +//! The important property here is repeatability, not statistical quality. +//! `DstSeed::fork` is used to derive independent substreams without requiring +//! callers to manually coordinate RNG state. + use serde::{Deserialize, Serialize}; +/// Top-level seed value for a deterministic run. #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] pub struct DstSeed(pub u64); @@ -16,6 +23,7 @@ impl DstSeed { } } +/// Small deterministic RNG for simulator code. #[derive(Clone, Debug)] pub struct DstRng { state: u64, diff --git a/crates/dst/src/shrink.rs b/crates/dst/src/shrink.rs index 64122c4a8e1..302f43cd172 100644 --- a/crates/dst/src/shrink.rs +++ b/crates/dst/src/shrink.rs @@ -1,21 +1,31 @@ -use crate::datastore_sim::{failure_reason, DatastoreExecutionFailure, DatastoreSimulatorCase, Interaction}; - -pub fn shrink_failure( - case: &DatastoreSimulatorCase, - failure: &DatastoreExecutionFailure, -) -> anyhow::Result { - let mut shrunk = case.clone(); - shrunk.interactions.truncate(failure.step_index.saturating_add(1)); - let target_reason = failure.reason.clone(); +//! Generic shrinking helpers for deterministic targets. + +/// Generic remove-and-replay shrink loop. +pub fn shrink_by_removing( + case: &C, + target_failure: &F, + truncate: impl Fn(&C) -> C, + len: impl Fn(&C) -> usize, + remove: impl Fn(&C, usize) -> Option, + replay_failure: impl Fn(&C) -> anyhow::Result, + same_failure: impl Fn(&F, &F) -> bool, +) -> anyhow::Result +where + C: Clone, +{ + let mut shrunk = truncate(case); let mut changed = true; while changed { changed = false; - for idx in (0..shrunk.interactions.len()).rev() { - let Some(candidate) = remove_interaction(&shrunk, idx) else { + for idx in (0..len(&shrunk)).rev() { + let Some(candidate) = remove(&shrunk, idx) else { + continue; + }; + let Ok(candidate_failure) = replay_failure(&candidate) else { continue; }; - if failure_reason(&candidate).ok().as_ref() == Some(&target_reason) { + if same_failure(target_failure, &candidate_failure) { shrunk = candidate; changed = true; } @@ -24,92 +34,3 @@ pub fn shrink_failure( Ok(shrunk) } - -fn remove_interaction(case: &DatastoreSimulatorCase, idx: usize) -> Option { - let interaction = case.interactions.get(idx)?; - if matches!( - interaction, - Interaction::CommitTx { .. } | Interaction::RollbackTx { .. } - ) { - return None; - } - - let mut interactions = case.interactions.clone(); - interactions.remove(idx); - Some(DatastoreSimulatorCase { - seed: case.seed, - num_connections: case.num_connections, - schema: case.schema.clone(), - interactions, - }) -} - -#[cfg(test)] -mod tests { - use crate::{ - datastore_sim::{ - run_case_detailed, ColumnKind, ColumnPlan, DatastoreSimulatorCase, Interaction, SchemaPlan, SimRow, - SimValue, TablePlan, - }, - seed::DstSeed, - shrink::shrink_failure, - }; - - #[test] - fn shrink_drops_trailing_noise() { - let case = DatastoreSimulatorCase { - seed: DstSeed(77), - num_connections: 1, - schema: SchemaPlan { - tables: vec![TablePlan { - name: "bugs".into(), - columns: vec![ - ColumnPlan { - name: "id".into(), - kind: ColumnKind::U64, - }, - ColumnPlan { - name: "name".into(), - kind: ColumnKind::String, - }, - ], - secondary_index_col: Some(1), - }], - }, - interactions: vec![ - Interaction::Insert { - conn: 0, - table: 0, - row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], - }, - }, - Interaction::AssertVisibleFresh { - table: 0, - row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], - }, - }, - Interaction::AssertMissingFresh { - table: 0, - row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], - }, - }, - Interaction::Insert { - conn: 0, - table: 0, - row: SimRow { - values: vec![SimValue::U64(2), SimValue::String("two".into())], - }, - }, - ], - }; - - let failure = run_case_detailed(&case).expect_err("case should fail"); - let shrunk = shrink_failure(&case, &failure).expect("shrink failure"); - assert!(shrunk.interactions.len() < case.interactions.len()); - let shrunk_failure = run_case_detailed(&shrunk).expect_err("shrunk case should still fail"); - assert_eq!(shrunk_failure.reason, failure.reason); - } -} diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs new file mode 100644 index 00000000000..b276cf1baef --- /dev/null +++ b/crates/dst/src/sim/mod.rs @@ -0,0 +1,4 @@ +//! Reusable simulator primitives. + +pub mod scheduler; +pub mod sync; diff --git a/crates/dst/src/scheduler.rs b/crates/dst/src/sim/scheduler.rs similarity index 86% rename from crates/dst/src/scheduler.rs rename to crates/dst/src/sim/scheduler.rs index c2c5ac7d56c..8f009df2fff 100644 --- a/crates/dst/src/scheduler.rs +++ b/crates/dst/src/sim/scheduler.rs @@ -1,5 +1,12 @@ +//! Generic scheduler for actor-style deterministic simulations. +//! +//! The scheduler is deliberately small. It repeatedly selects a runnable actor, +//! lets it emit events into the trace, and stops once every actor reports that +//! it is complete. + use crate::{seed::DstRng, trace::Trace}; +/// Result of asking an actor to make one step of progress. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum StepState { Progressed, @@ -7,6 +14,7 @@ pub enum StepState { Complete, } +/// Minimal interface for something the scheduler can drive. pub trait Actor { type Event: Clone; @@ -14,12 +22,14 @@ pub trait Actor { fn is_complete(&self) -> bool; } +/// Policy for choosing the next runnable actor. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ScheduleMode { RoundRobin, Seeded, } +/// Deterministic actor scheduler with either round-robin or seeded selection. pub struct Scheduler { actors: Vec, cursor: usize, diff --git a/crates/dst/src/sync.rs b/crates/dst/src/sim/sync.rs similarity index 88% rename from crates/dst/src/sync.rs rename to crates/dst/src/sim/sync.rs index d8f63ee2cea..f247e787bcb 100644 --- a/crates/dst/src/sync.rs +++ b/crates/dst/src/sim/sync.rs @@ -1,5 +1,11 @@ +//! Tiny synchronization primitives for deterministic tests. +//! +//! This file models only the behavior needed by crate tests; it is not trying +//! to be a full synchronization library. + use std::collections::VecDeque; +/// Lock lifecycle events emitted by [`SimRwLock`]. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum LockEventKind { ReadRequested, @@ -10,12 +16,14 @@ pub enum LockEventKind { WriteReleased, } +/// One simulated lock event tagged with the actor that caused it. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct LockEvent { pub actor_id: usize, pub kind: LockEventKind, } +/// Minimal FIFO read/write lock model used in deterministic tests. #[derive(Clone, Debug, Default)] pub struct SimRwLock { readers: usize, diff --git a/crates/dst/src/subsystem.rs b/crates/dst/src/subsystem.rs index 221db8a6209..299802f74f8 100644 --- a/crates/dst/src/subsystem.rs +++ b/crates/dst/src/subsystem.rs @@ -1,5 +1,17 @@ +//! Shared traits for deterministic simulation subsystems. +//! +//! A subsystem defines: +//! +//! - a generated `Case`, +//! - a stream of traced `Event`s, +//! - a final `Outcome`. +//! +//! `RunRecord` packages those pieces together so replay checks and invariants +//! can reason about one run without knowing subsystem-specific details. + use crate::{seed::DstSeed, trace::Trace}; +/// A deterministic simulation subsystem. pub trait DstSubsystem { type Case: Clone + core::fmt::Debug + Eq + PartialEq; type Event: Clone + core::fmt::Debug + Eq + PartialEq; @@ -10,6 +22,7 @@ pub trait DstSubsystem { fn run_case(case: &Self::Case) -> anyhow::Result>; } +/// Result of one fully executed deterministic run. #[derive(Clone, Debug, Eq, PartialEq)] pub struct RunRecord { pub subsystem: &'static str, @@ -19,11 +32,13 @@ pub struct RunRecord { pub outcome: O, } +/// Post-run assertion over a run record. pub trait Invariant { fn name(&self) -> &'static str; fn check(&self, run: &R) -> anyhow::Result<()>; } +/// Runs each invariant and annotates failures with the invariant name. pub fn assert_invariants(run: &R, invariants: &[&dyn Invariant]) -> anyhow::Result<()> { for invariant in invariants { invariant diff --git a/crates/dst/src/datastore_sim.rs b/crates/dst/src/targets/datastore.rs similarity index 56% rename from crates/dst/src/datastore_sim.rs rename to crates/dst/src/targets/datastore.rs index a3d412560eb..25bd321e022 100644 --- a/crates/dst/src/datastore_sim.rs +++ b/crates/dst/src/targets/datastore.rs @@ -1,4 +1,26 @@ -use std::{collections::BTreeSet, fs, path::Path}; +//! Randomized datastore simulator target. +//! +//! This is the highest-level subsystem in the crate: +//! +//! - generate a schema, +//! - generate a deterministic interaction stream or plan, +//! - execute the plan against a real datastore instance, +//! - compare the final committed datastore state against an in-memory model. +//! +//! The file is large, so it is easiest to read in this order: +//! +//! 1. case and interaction types, +//! 2. `generate_case` and `InteractionStream`, +//! 3. `run_case_detailed` / `run_generated_stream`, +//! 4. `execute_interaction`, +//! 5. `GenerationModel`, +//! 6. `ExpectedModel`. + +use std::{ + collections::{BTreeSet, VecDeque}, + fs, + path::Path, +}; use serde::{Deserialize, Serialize}; use spacetimedb_datastore::{ @@ -7,7 +29,10 @@ use spacetimedb_datastore::{ traits::{IsolationLevel, MutTx, MutTxDatastore, Tx}, }; use spacetimedb_execution::Datastore as _; -use spacetimedb_lib::db::auth::{StAccess, StTableType}; +use spacetimedb_lib::{ + db::auth::{StAccess, StTableType}, + Identity, +}; use spacetimedb_primitives::TableId; use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; use spacetimedb_schema::{ @@ -15,14 +40,17 @@ use spacetimedb_schema::{ schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, table_name::TableName, }; +use spacetimedb_table::page_pool::PagePool; use crate::{ - datastore::bootstrap_datastore, + bugbase::{load_json, save_json, BugArtifact}, seed::{DstRng, DstSeed}, + shrink::shrink_by_removing, subsystem::{DstSubsystem, RunRecord}, trace::Trace, }; +/// Full input for one randomized datastore simulator run. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct DatastoreSimulatorCase { pub seed: DstSeed, @@ -31,11 +59,13 @@ pub struct DatastoreSimulatorCase { pub interactions: Vec, } +/// Generated schema for one simulator case. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct SchemaPlan { pub tables: Vec, } +/// Table definition used by the simulator. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct TablePlan { pub name: String, @@ -43,12 +73,14 @@ pub struct TablePlan { pub secondary_index_col: Option, } +/// Column definition used by the simulator. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct ColumnPlan { pub name: String, pub kind: ColumnKind, } +/// Small set of column kinds currently supported by the simulator. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum ColumnKind { U64, @@ -56,11 +88,13 @@ pub enum ColumnKind { Bool, } +/// Serializable row representation used by generated interactions. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct SimRow { pub values: Vec, } +/// Serializable cell value used by generated interactions. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum SimValue { U64(u64), @@ -68,6 +102,10 @@ pub enum SimValue { Bool(bool), } +/// One generated simulator step. +/// +/// The plan intentionally mixes mutations with immediate assertions so failures +/// are attributed to the first step that violates an invariant. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum Interaction { BeginTx { conn: usize }, @@ -82,17 +120,20 @@ pub enum Interaction { AssertRowCountFresh { table: usize, expected: u64 }, } +/// Trace event for the datastore simulator. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum DatastoreSimulatorEvent { Executed(Interaction), } +/// Final state collected from the datastore after the run. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct DatastoreSimulatorOutcome { pub final_row_counts: Vec, pub final_rows: Vec>, } +/// Rich failure returned by `run_case_detailed`. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct DatastoreExecutionFailure { pub step_index: usize, @@ -100,6 +141,9 @@ pub struct DatastoreExecutionFailure { pub interaction: Interaction, } +pub type DatastoreBugArtifact = BugArtifact; + +/// DST subsystem wrapper around the randomized datastore simulator. pub struct DatastoreSimulatorSubsystem; impl DstSubsystem for DatastoreSimulatorSubsystem { @@ -126,11 +170,13 @@ impl DstSubsystem for DatastoreSimulatorSubsystem { } } +/// Generates a deterministic simulator case from a seed. pub fn generate_case(seed: DstSeed) -> DatastoreSimulatorCase { let mut rng = seed.fork(17).rng(); let num_connections = rng.index(3) + 1; let schema = generate_schema(&mut rng); - let interactions = generate_interactions(seed, &schema, num_connections, &mut rng); + let interactions = + InteractionStream::new(seed, schema.clone(), num_connections, default_target_ops(&mut rng)).collect(); DatastoreSimulatorCase { seed, num_connections, @@ -139,38 +185,92 @@ pub fn generate_case(seed: DstSeed) -> DatastoreSimulatorCase { } } +/// Executes a generated case and returns either a full run record or the first +/// failing interaction. pub fn run_case_detailed( case: &DatastoreSimulatorCase, ) -> Result< RunRecord, DatastoreExecutionFailure, +> { + run_interactions( + case.seed, + case.schema.clone(), + case.num_connections, + case.interactions.iter().cloned(), + Some(case.clone()), + ) +} + +/// Executes a generated simulator workload without first materializing all +/// interactions in memory. +pub fn run_generated_stream(seed: DstSeed, max_interactions: usize) -> anyhow::Result { + let mut rng = seed.fork(17).rng(); + let num_connections = rng.index(3) + 1; + let schema = generate_schema(&mut rng); + let stream = InteractionStream::new(seed, schema.clone(), num_connections, max_interactions); + let datastore = bootstrap_datastore()?; + let table_ids = install_schema(&datastore, &schema)?; + let mut execution = ExecutionState::new(num_connections); + let mut expected = ExpectedModel::new(table_ids.len(), num_connections); + + for (step_index, interaction) in stream.enumerate() { + execute_interaction(&datastore, &table_ids, &mut execution, &interaction).map_err(|reason| { + anyhow::anyhow!("datastore simulator failed at step {step_index}: {reason}") + })?; + expected.apply(&interaction); + } + + execution.rollback_all(&datastore); + + let outcome = collect_outcome(&datastore, &table_ids)?; + let expected_rows = expected.committed_rows(); + if outcome.final_rows != expected_rows { + anyhow::bail!( + "final datastore state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + ); + } + + Ok(outcome) +} + +fn run_interactions( + seed: DstSeed, + schema: SchemaPlan, + num_connections: usize, + interactions: impl IntoIterator, + case_override: Option, +) -> Result< + RunRecord, + DatastoreExecutionFailure, > { let datastore = bootstrap_datastore().map_err(|err| failure_without_step(format!("bootstrap failed: {err}")))?; - let table_ids = install_schema(&datastore, &case.schema) + let table_ids = install_schema(&datastore, &schema) .map_err(|err| failure_without_step(format!("schema install failed: {err}")))?; let mut trace = Trace::default(); - let mut connections: Vec> = (0..case.num_connections).map(|_| None).collect(); + let mut execution = ExecutionState::new(num_connections); + let mut expected = ExpectedModel::new(table_ids.len(), num_connections); + let mut executed_interactions = Vec::new(); - for (step_index, interaction) in case.interactions.iter().cloned().enumerate() { + for (step_index, interaction) in interactions.into_iter().enumerate() { trace.push(DatastoreSimulatorEvent::Executed(interaction.clone())); - execute_interaction(&datastore, &table_ids, &mut connections, &interaction).map_err(|reason| { + execute_interaction(&datastore, &table_ids, &mut execution, &interaction).map_err(|reason| { DatastoreExecutionFailure { step_index, reason, - interaction, + interaction: interaction.clone(), } })?; + expected.apply(&interaction); + executed_interactions.push(interaction); } - for tx in &mut connections { - if let Some(tx) = tx.take() { - let _ = datastore.rollback_mut_tx(tx); - } - } + execution.rollback_all(&datastore); let outcome = collect_outcome(&datastore, &table_ids) .map_err(|err| failure_without_step(format!("collect outcome failed: {err}")))?; - let expected_rows = expected_committed_rows(case); + let expected_rows = expected.committed_rows(); if outcome.final_rows != expected_rows { return Err(failure_without_step(format!( "final datastore state mismatch: expected={expected_rows:?} actual={:?}", @@ -178,26 +278,36 @@ pub fn run_case_detailed( ))); } + let case = case_override.unwrap_or(DatastoreSimulatorCase { + seed, + num_connections, + schema, + interactions: executed_interactions, + }); + Ok(RunRecord { subsystem: DatastoreSimulatorSubsystem::name(), - seed: case.seed, - case: case.clone(), + seed, + case, trace: Some(trace), outcome, }) } +/// Saves a simulator case as JSON for replay or debugging. pub fn save_case(path: impl AsRef, case: &DatastoreSimulatorCase) -> anyhow::Result<()> { let body = serde_json::to_string_pretty(case)?; fs::write(path, body)?; Ok(()) } +/// Loads a simulator case previously written by [`save_case`]. pub fn load_case(path: impl AsRef) -> anyhow::Result { let body = fs::read_to_string(path)?; Ok(serde_json::from_str(&body)?) } +/// Runs a case and extracts only the failure reason. pub fn failure_reason(case: &DatastoreSimulatorCase) -> anyhow::Result { match run_case_detailed(case) { Ok(_) => anyhow::bail!("case did not fail"), @@ -205,6 +315,55 @@ pub fn failure_reason(case: &DatastoreSimulatorCase) -> anyhow::Result { } } +pub fn save_bug_artifact(path: impl AsRef, artifact: &DatastoreBugArtifact) -> anyhow::Result<()> { + save_json(path, artifact) +} + +pub fn load_bug_artifact(path: impl AsRef) -> anyhow::Result { + load_json(path) +} + +pub fn shrink_failure( + case: &DatastoreSimulatorCase, + failure: &DatastoreExecutionFailure, +) -> anyhow::Result { + shrink_by_removing( + case, + failure, + |case| { + let mut shrunk = case.clone(); + shrunk.interactions.truncate(failure.step_index.saturating_add(1)); + shrunk + }, + |case| case.interactions.len(), + remove_interaction, + |case| match run_case_detailed(case) { + Ok(_) => anyhow::bail!("case did not fail"), + Err(failure) => Ok(failure), + }, + |expected, candidate| expected.reason == candidate.reason, + ) +} + +fn remove_interaction(case: &DatastoreSimulatorCase, idx: usize) -> Option { + let interaction = case.interactions.get(idx)?; + if matches!( + interaction, + Interaction::CommitTx { .. } | Interaction::RollbackTx { .. } + ) { + return None; + } + + let mut interactions = case.interactions.clone(); + interactions.remove(idx); + Some(DatastoreSimulatorCase { + seed: case.seed, + num_connections: case.num_connections, + schema: case.schema.clone(), + interactions, + }) +} + fn generate_schema(rng: &mut DstRng) -> SchemaPlan { let table_count = rng.index(3) + 1; let mut tables = Vec::with_capacity(table_count); @@ -236,83 +395,12 @@ fn generate_schema(rng: &mut DstRng) -> SchemaPlan { SchemaPlan { tables } } -fn generate_interactions( - seed: DstSeed, - schema: &SchemaPlan, - num_connections: usize, - rng: &mut DstRng, -) -> Vec { - let mut plan = Vec::new(); - let mut model = GenerationModel::new(schema, num_connections, seed); - let target_ops = 24 + rng.index(24); - - while plan.len() < target_ops { - let conn = model.open_tx_conn().unwrap_or_else(|| rng.index(num_connections)); - - if !model.connections[conn].in_tx && model.open_tx_conn().is_none() && rng.index(100) < 20 { - model.connections[conn].in_tx = true; - plan.push(Interaction::BeginTx { conn }); - continue; - } - - if model.connections[conn].in_tx && rng.index(100) < 15 { - let followups = model.commit(conn); - plan.push(Interaction::CommitTx { conn }); - plan.extend(followups); - continue; - } - - if model.connections[conn].in_tx && rng.index(100) < 10 { - let followups = model.rollback(conn); - plan.push(Interaction::RollbackTx { conn }); - plan.extend(followups); - continue; - } - - let table = rng.index(schema.tables.len()); - let visible_rows = model.visible_rows(conn, table); - let choose_insert = visible_rows.is_empty() || rng.index(100) < 65; - if choose_insert { - let row = model.make_row(rng, table); - model.insert(conn, table, row.clone()); - plan.push(Interaction::Insert { - conn, - table, - row: row.clone(), - }); - plan.push(Interaction::AssertVisibleInConnection { conn, table, row }); - if !model.connections[conn].in_tx { - let row = model.last_inserted_row(conn).expect("tracked auto-commit insert"); - plan.push(Interaction::AssertVisibleFresh { table, row }); - } - } else { - let row = visible_rows[rng.index(visible_rows.len())].clone(); - model.delete(conn, table, row.clone()); - plan.push(Interaction::Delete { - conn, - table, - row: row.clone(), - }); - plan.push(Interaction::AssertMissingInConnection { - conn, - table, - row: row.clone(), - }); - if !model.connections[conn].in_tx { - plan.push(Interaction::AssertMissingFresh { table, row }); - } - } - } - - for conn in 0..num_connections { - if model.connections[conn].in_tx { - let followups = model.commit(conn); - plan.push(Interaction::CommitTx { conn }); - plan.extend(followups); - } - } +fn default_target_ops(rng: &mut DstRng) -> usize { + 24 + rng.index(24) +} - plan +fn bootstrap_datastore() -> spacetimedb_datastore::Result { + Locking::bootstrap(Identity::ZERO, PagePool::new_for_test()) } fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result> { @@ -370,42 +458,47 @@ fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result], + execution: &mut ExecutionState, interaction: &Interaction, ) -> Result<(), String> { match interaction { Interaction::BeginTx { conn } => { - let slot = connections - .get_mut(*conn) - .ok_or_else(|| format!("connection {conn} out of range"))?; - if slot.is_some() { + execution.ensure_known_connection(*conn)?; + if execution.tx_by_connection[*conn].is_some() { return Err(format!("connection {conn} already has open transaction")); } - *slot = Some(datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests)); + if let Some(owner) = execution.active_writer { + return Err(format!( + "connection {conn} cannot begin write transaction while connection {owner} owns lock" + )); + } + execution.tx_by_connection[*conn] = + Some(datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests)); + execution.active_writer = Some(*conn); } Interaction::CommitTx { conn } => { - let tx = connections - .get_mut(*conn) - .ok_or_else(|| format!("connection {conn} out of range"))? + execution.ensure_writer_owner(*conn, "commit")?; + let tx = execution.tx_by_connection[*conn] .take() .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; datastore .commit_mut_tx(tx) .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; + execution.active_writer = None; } Interaction::RollbackTx { conn } => { - let tx = connections - .get_mut(*conn) - .ok_or_else(|| format!("connection {conn} out of range"))? + execution.ensure_writer_owner(*conn, "rollback")?; + let tx = execution.tx_by_connection[*conn] .take() .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = datastore.rollback_mut_tx(tx); + execution.active_writer = None; } Interaction::Insert { conn, table, row } => { with_mut_tx( datastore, table_ids, - connections, + execution, *conn, *table, |datastore, table_id, tx| { @@ -421,7 +514,7 @@ fn execute_interaction( with_mut_tx( datastore, table_ids, - connections, + execution, *conn, *table, |datastore, table_id, tx| { @@ -438,7 +531,7 @@ fn execute_interaction( .get(*table) .ok_or_else(|| format!("table {table} out of range"))?; let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = if let Some(Some(tx)) = connections.get(*conn) { + let found = if let Some(Some(tx)) = execution.tx_by_connection.get(*conn) { datastore .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) .map_err(|err| format!("in-tx lookup failed: {err}"))? @@ -457,7 +550,7 @@ fn execute_interaction( .get(*table) .ok_or_else(|| format!("table {table} out of range"))?; let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = if let Some(Some(tx)) = connections.get(*conn) { + let found = if let Some(Some(tx)) = execution.tx_by_connection.get(*conn) { datastore .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) .map_err(|err| format!("in-tx lookup failed: {err}"))? @@ -511,7 +604,7 @@ fn execute_interaction( fn with_mut_tx( datastore: &Locking, table_ids: &[TableId], - connections: &mut [Option], + execution: &mut ExecutionState, conn: usize, table: usize, mut f: impl FnMut(&Locking, TableId, &mut MutTxId) -> Result<(), String>, @@ -519,18 +612,24 @@ fn with_mut_tx( let table_id = *table_ids .get(table) .ok_or_else(|| format!("table {table} out of range"))?; - let slot = connections - .get_mut(conn) - .ok_or_else(|| format!("connection {conn} out of range"))?; + execution.ensure_known_connection(conn)?; + let slot = &mut execution.tx_by_connection[conn]; match slot { Some(tx) => f(datastore, table_id, tx), None => { + if let Some(owner) = execution.active_writer { + return Err(format!( + "connection {conn} cannot auto-commit write while connection {owner} owns lock" + )); + } let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + execution.active_writer = Some(conn); f(datastore, table_id, &mut tx)?; datastore .commit_mut_tx(tx) .map_err(|err| format!("auto-commit failed on connection {conn}: {err}"))?; + execution.active_writer = None; Ok(()) } } @@ -565,18 +664,6 @@ fn collect_outcome(datastore: &Locking, table_ids: &[TableId]) -> anyhow::Result }) } -fn expected_committed_rows(case: &DatastoreSimulatorCase) -> Vec> { - let mut model = ExpectedModel::new(case.schema.tables.len(), case.num_connections); - for interaction in &case.interactions { - model.apply(interaction); - } - let mut rows = model.committed; - for table_rows in &mut rows { - table_rows.sort_by_key(|row| row.id().unwrap_or_default()); - } - rows -} - fn failure_without_step(reason: String) -> DatastoreExecutionFailure { DatastoreExecutionFailure { step_index: usize::MAX, @@ -640,12 +727,177 @@ impl SimRow { } } +struct ExecutionState { + tx_by_connection: Vec>, + active_writer: Option, +} + +impl ExecutionState { + fn new(connection_count: usize) -> Self { + Self { + tx_by_connection: (0..connection_count).map(|_| None).collect(), + active_writer: None, + } + } + + fn ensure_known_connection(&self, conn: usize) -> Result<(), String> { + self.tx_by_connection + .get(conn) + .map(|_| ()) + .ok_or_else(|| format!("connection {conn} out of range")) + } + + fn ensure_writer_owner(&self, conn: usize, action: &str) -> Result<(), String> { + self.ensure_known_connection(conn)?; + match self.active_writer { + Some(owner) if owner == conn => Ok(()), + Some(owner) => Err(format!( + "connection {conn} cannot {action} while connection {owner} owns lock" + )), + None => Err(format!("connection {conn} has no transaction to {action}")), + } + } + + fn rollback_all(&mut self, datastore: &Locking) { + for tx in &mut self.tx_by_connection { + if let Some(tx) = tx.take() { + let _ = datastore.rollback_mut_tx(tx); + } + } + self.active_writer = None; + } +} + +#[derive(Clone, Debug)] +struct InteractionStream { + rng: DstRng, + model: GenerationModel, + num_connections: usize, + target_interactions: usize, + emitted: usize, + finalize_conn: usize, + pending: VecDeque, + finished: bool, +} + +impl InteractionStream { + fn new(seed: DstSeed, schema: SchemaPlan, num_connections: usize, target_interactions: usize) -> Self { + Self { + rng: seed.fork(17).rng(), + model: GenerationModel::new(&schema, num_connections, seed), + num_connections, + target_interactions, + emitted: 0, + finalize_conn: 0, + pending: VecDeque::new(), + finished: false, + } + } + + fn fill_pending(&mut self) { + if self.emitted >= self.target_interactions { + while self.finalize_conn < self.num_connections { + let conn = self.finalize_conn; + self.finalize_conn += 1; + if self.model.connections[conn].in_tx { + let followups = self.model.commit(conn); + self.pending.push_back(Interaction::CommitTx { conn }); + self.pending.extend(followups); + return; + } + } + self.finished = true; + return; + } + + let conn = self + .model + .active_writer() + .unwrap_or_else(|| self.rng.index(self.num_connections)); + + if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() && self.rng.index(100) < 20 { + self.model.begin_tx(conn); + self.pending.push_back(Interaction::BeginTx { conn }); + return; + } + + if self.model.connections[conn].in_tx && self.rng.index(100) < 15 { + let followups = self.model.commit(conn); + self.pending.push_back(Interaction::CommitTx { conn }); + self.pending.extend(followups); + return; + } + + if self.model.connections[conn].in_tx && self.rng.index(100) < 10 { + let followups = self.model.rollback(conn); + self.pending.push_back(Interaction::RollbackTx { conn }); + self.pending.extend(followups); + return; + } + + let table = self.rng.index(self.model.schema.tables.len()); + let visible_rows = self.model.visible_rows(conn, table); + let choose_insert = visible_rows.is_empty() || self.rng.index(100) < 65; + if choose_insert { + let row = self.model.make_row(&mut self.rng, table); + self.model.insert(conn, table, row.clone()); + self.pending.push_back(Interaction::Insert { + conn, + table, + row: row.clone(), + }); + self.pending.push_back(Interaction::AssertVisibleInConnection { conn, table, row }); + if !self.model.connections[conn].in_tx { + let row = self.model.last_inserted_row(conn).expect("tracked auto-commit insert"); + self.pending.push_back(Interaction::AssertVisibleFresh { table, row }); + } + return; + } + + let row = visible_rows[self.rng.index(visible_rows.len())].clone(); + self.model.delete(conn, table, row.clone()); + self.pending.push_back(Interaction::Delete { + conn, + table, + row: row.clone(), + }); + self.pending.push_back(Interaction::AssertMissingInConnection { + conn, + table, + row: row.clone(), + }); + if !self.model.connections[conn].in_tx { + self.pending.push_back(Interaction::AssertMissingFresh { table, row }); + } + } +} + +impl Iterator for InteractionStream { + type Item = Interaction; + + fn next(&mut self) -> Option { + loop { + if let Some(interaction) = self.pending.pop_front() { + self.emitted += 1; + return Some(interaction); + } + + if self.finished { + return None; + } + + self.fill_pending(); + } + } +} + #[derive(Clone, Debug)] struct GenerationModel { schema: SchemaPlan, connections: Vec, committed: Vec>, next_ids: Vec, + active_writer: Option, } #[derive(Clone, Debug, Default)] @@ -665,6 +917,7 @@ impl GenerationModel { next_ids: (0..schema.tables.len()) .map(|idx| seed.fork(idx as u64 + 100).0) .collect(), + active_writer: None, } } @@ -699,8 +952,16 @@ impl GenerationModel { rows } - fn open_tx_conn(&self) -> Option { - self.connections.iter().position(|conn| conn.in_tx) + fn active_writer(&self) -> Option { + self.active_writer + } + + fn begin_tx(&mut self, conn: usize) { + assert!(self.active_writer.is_none(), "single writer already active"); + let pending = &mut self.connections[conn]; + assert!(!pending.in_tx, "connection already in transaction"); + pending.in_tx = true; + self.active_writer = Some(conn); } fn insert(&mut self, conn: usize, table: usize, row: SimRow) { @@ -734,6 +995,7 @@ impl GenerationModel { let inserts = std::mem::take(&mut pending.staged_inserts); let deletes = std::mem::take(&mut pending.staged_deletes); pending.in_tx = false; + self.active_writer = None; for (table, row) in &deletes { self.committed[*table].retain(|candidate| candidate != row); @@ -763,6 +1025,7 @@ impl GenerationModel { pending.staged_inserts.clear(); pending.staged_deletes.clear(); pending.in_tx = false; + self.active_writer = None; touched_tables .into_iter() .map(|table| Interaction::AssertRowCountFresh { @@ -777,6 +1040,7 @@ impl GenerationModel { struct ExpectedModel { committed: Vec>, connections: Vec, + active_writer: Option, } #[derive(Clone, Debug, Default)] @@ -791,13 +1055,19 @@ impl ExpectedModel { Self { committed: vec![Vec::new(); table_count], connections: vec![ExpectedConnection::default(); connection_count], + active_writer: None, } } fn apply(&mut self, interaction: &Interaction) { match interaction { - Interaction::BeginTx { conn } => self.connections[*conn].in_tx = true, + Interaction::BeginTx { conn } => { + assert!(self.active_writer.is_none(), "multiple concurrent writers in expected model"); + self.connections[*conn].in_tx = true; + self.active_writer = Some(*conn); + } Interaction::CommitTx { conn } => { + assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in expected model"); let state = &mut self.connections[*conn]; for (table, row) in state.staged_deletes.drain(..) { self.committed[table].retain(|candidate| *candidate != row); @@ -806,12 +1076,15 @@ impl ExpectedModel { self.committed[table].push(row); } state.in_tx = false; + self.active_writer = None; } Interaction::RollbackTx { conn } => { + assert_eq!(self.active_writer, Some(*conn), "rollback by non-owner in expected model"); let state = &mut self.connections[*conn]; state.staged_inserts.clear(); state.staged_deletes.clear(); state.in_tx = false; + self.active_writer = None; } Interaction::Insert { conn, table, row } => { let state = &mut self.connections[*conn]; @@ -839,6 +1112,13 @@ impl ExpectedModel { | Interaction::AssertRowCountFresh { .. } => {} } } + + fn committed_rows(mut self) -> Vec> { + for table_rows in &mut self.committed { + table_rows.sort_by_key(|row| row.id().unwrap_or_default()); + } + self.committed + } } #[cfg(test)] @@ -847,6 +1127,7 @@ mod tests { use pretty_assertions::assert_eq; use proptest::prelude::*; + use tempfile::tempdir; use crate::{ runner::{rerun_case, run_generated, verify_repeatable_execution}, @@ -854,8 +1135,9 @@ mod tests { }; use super::{ - failure_reason, run_case_detailed, ColumnKind, ColumnPlan, DatastoreSimulatorCase, DatastoreSimulatorSubsystem, - Interaction, SchemaPlan, SimRow, SimValue, TablePlan, + failure_reason, generate_case, load_bug_artifact, run_case_detailed, run_generated_stream, save_bug_artifact, + shrink_failure, ColumnKind, ColumnPlan, DatastoreBugArtifact, DatastoreSimulatorCase, + DatastoreSimulatorSubsystem, Interaction, SchemaPlan, SimRow, SimValue, TablePlan, }; fn test_lock() -> &'static Mutex<()> { @@ -902,6 +1184,178 @@ mod tests { } } + #[test] + fn streamed_runner_supports_long_cases() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + run_generated_stream(DstSeed(1234), 10_000).expect("run long streamed datastore simulator case"); + } + + #[test] + fn generated_cases_keep_single_writer_lock() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + let case = generate_case(DstSeed(4242)); + let mut owner = None; + + for interaction in case.interactions { + match interaction { + Interaction::BeginTx { conn } => { + assert_eq!(owner, None, "second writer opened before first closed"); + owner = Some(conn); + } + Interaction::CommitTx { conn } | Interaction::RollbackTx { conn } => { + assert_eq!(owner, Some(conn), "non-owner closed writer"); + owner = None; + } + Interaction::Insert { conn, .. } + | Interaction::Delete { conn, .. } + | Interaction::AssertVisibleInConnection { conn, .. } + | Interaction::AssertMissingInConnection { conn, .. } => { + if let Some(writer) = owner { + assert_eq!(conn, writer, "interaction ran on non-owner while writer open"); + } + } + Interaction::AssertVisibleFresh { .. } + | Interaction::AssertMissingFresh { .. } + | Interaction::AssertRowCountFresh { .. } => {} + } + } + + assert_eq!(owner, None, "writer left open at end of generated case"); + } + + #[test] + fn second_writer_fails_fast() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + let case = DatastoreSimulatorCase { + seed: DstSeed(88), + num_connections: 2, + schema: SchemaPlan { + tables: vec![TablePlan { + name: "locks".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + kind: ColumnKind::U64, + }, + ColumnPlan { + name: "name".into(), + kind: ColumnKind::String, + }, + ], + secondary_index_col: Some(1), + }], + }, + interactions: vec![Interaction::BeginTx { conn: 0 }, Interaction::BeginTx { conn: 1 }], + }; + + let failure = run_case_detailed(&case).expect_err("second writer should fail"); + assert_eq!(failure.step_index, 1); + assert!(failure.reason.contains("owns lock")); + } + + #[test] + fn bug_artifact_roundtrips() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + let dir = tempdir().expect("create tempdir"); + let path = dir.path().join("bug.json"); + let case = DatastoreSimulatorCase { + seed: DstSeed(5), + num_connections: 1, + schema: SchemaPlan { + tables: vec![TablePlan { + name: "bugs".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + kind: ColumnKind::U64, + }, + ColumnPlan { + name: "ok".into(), + kind: ColumnKind::Bool, + }, + ], + secondary_index_col: Some(1), + }], + }, + interactions: vec![Interaction::AssertVisibleFresh { + table: 0, + row: SimRow { + values: vec![SimValue::U64(7), SimValue::Bool(true)], + }, + }], + }; + let failure = run_case_detailed(&case).expect_err("case should fail"); + let artifact = DatastoreBugArtifact { + seed: case.seed.0, + failure, + case: case.clone(), + shrunk_case: Some(case), + }; + + save_bug_artifact(&path, &artifact).expect("save artifact"); + let loaded = load_bug_artifact(&path).expect("load artifact"); + assert_eq!(loaded, artifact); + } + + #[test] + fn shrink_drops_trailing_noise() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + let case = DatastoreSimulatorCase { + seed: DstSeed(77), + num_connections: 1, + schema: SchemaPlan { + tables: vec![TablePlan { + name: "bugs".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + kind: ColumnKind::U64, + }, + ColumnPlan { + name: "name".into(), + kind: ColumnKind::String, + }, + ], + secondary_index_col: Some(1), + }], + }, + interactions: vec![ + Interaction::Insert { + conn: 0, + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + Interaction::AssertVisibleFresh { + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + Interaction::AssertMissingFresh { + table: 0, + row: SimRow { + values: vec![SimValue::U64(1), SimValue::String("one".into())], + }, + }, + Interaction::Insert { + conn: 0, + table: 0, + row: SimRow { + values: vec![SimValue::U64(2), SimValue::String("two".into())], + }, + }, + ], + }; + + let failure = run_case_detailed(&case).expect_err("case should fail"); + let shrunk = shrink_failure(&case, &failure).expect("shrink failure"); + assert!(shrunk.interactions.len() < case.interactions.len()); + let shrunk_failure = run_case_detailed(&shrunk).expect_err("shrunk case should still fail"); + assert_eq!(shrunk_failure.reason, failure.reason); + } + fn failing_case() -> DatastoreSimulatorCase { DatastoreSimulatorCase { seed: DstSeed(99), diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs new file mode 100644 index 00000000000..3dac4e35aae --- /dev/null +++ b/crates/dst/src/targets/mod.rs @@ -0,0 +1,3 @@ +//! Concrete simulation targets. + +pub mod datastore; diff --git a/crates/dst/src/trace.rs b/crates/dst/src/trace.rs index 7dbb9f6f83c..ccb146b6774 100644 --- a/crates/dst/src/trace.rs +++ b/crates/dst/src/trace.rs @@ -1,3 +1,10 @@ +//! Trace representation for deterministic runs. +//! +//! Each event gets a monotonically increasing `step_id`. Additional metadata is +//! optional so simple simulations can use plain event streams while richer +//! schedulers can attach logical time, actor ids, or resource ids. + +/// One event plus optional metadata captured during execution. #[derive(Clone, Debug, Eq, PartialEq)] pub struct StampedEvent { pub step_id: u64, @@ -7,6 +14,7 @@ pub struct StampedEvent { pub event: E, } +/// Ordered event log for one deterministic run. #[derive(Clone, Debug, Eq, PartialEq)] pub struct Trace { events: Vec>, From f52ef9e112d83c8fd4329d6ff7fe8e9cb7c544a2 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 22 Apr 2026 13:55:13 +0530 Subject: [PATCH 07/74] local mocks --- crates/dst/Cargo.toml | 8 + crates/dst/README.md | 109 ++ crates/dst/src/config.rs | 61 + crates/dst/src/lib.rs | 35 +- crates/dst/src/main.rs | 277 ++++ crates/dst/src/schema.rs | 131 ++ crates/dst/src/subsystem.rs | 5 + crates/dst/src/targets/datastore.rs | 1380 +++++------------ crates/dst/src/targets/harness.rs | 140 ++ crates/dst/src/targets/mod.rs | 2 + crates/dst/src/targets/relational_db.rs | 532 +++++++ crates/dst/src/trace.rs | 5 + crates/dst/src/workload/mod.rs | 3 + .../dst/src/workload/table_ops/generation.rs | 172 ++ crates/dst/src/workload/table_ops/mod.rs | 17 + crates/dst/src/workload/table_ops/model.rs | 253 +++ .../dst/src/workload/table_ops/properties.rs | 35 + crates/dst/src/workload/table_ops/runner.rs | 118 ++ .../workload/table_ops/scenarios/banking.rs | 160 ++ .../src/workload/table_ops/scenarios/mod.rs | 110 ++ .../table_ops/scenarios/random_crud.rs | 88 ++ crates/dst/src/workload/table_ops/types.rs | 114 ++ 22 files changed, 2740 insertions(+), 1015 deletions(-) create mode 100644 crates/dst/README.md create mode 100644 crates/dst/src/config.rs create mode 100644 crates/dst/src/main.rs create mode 100644 crates/dst/src/schema.rs create mode 100644 crates/dst/src/targets/harness.rs create mode 100644 crates/dst/src/targets/relational_db.rs create mode 100644 crates/dst/src/workload/mod.rs create mode 100644 crates/dst/src/workload/table_ops/generation.rs create mode 100644 crates/dst/src/workload/table_ops/mod.rs create mode 100644 crates/dst/src/workload/table_ops/model.rs create mode 100644 crates/dst/src/workload/table_ops/properties.rs create mode 100644 crates/dst/src/workload/table_ops/runner.rs create mode 100644 crates/dst/src/workload/table_ops/scenarios/banking.rs create mode 100644 crates/dst/src/workload/table_ops/scenarios/mod.rs create mode 100644 crates/dst/src/workload/table_ops/scenarios/random_crud.rs create mode 100644 crates/dst/src/workload/table_ops/types.rs diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 779afa081fa..6f6592bfb76 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -6,11 +6,19 @@ license-file = "LICENSE" description = "Deterministic simulation testing utilities for SpacetimeDB crates" rust-version.workspace = true +[[bin]] +name = "spacetimedb-dst" +path = "src/main.rs" +bench = false + [dependencies] anyhow.workspace = true +clap.workspace = true serde.workspace = true serde_json.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } +spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.1.0" } +spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.1.0" } spacetimedb-execution.workspace = true spacetimedb-lib.workspace = true spacetimedb-primitives.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md new file mode 100644 index 00000000000..390ce478669 --- /dev/null +++ b/crates/dst/README.md @@ -0,0 +1,109 @@ +# `spacetimedb-dst` + +Deterministic simulation testing utilities for SpacetimeDB. + +## What Is In This Crate + +This crate contains reusable pieces for building deterministic simulations, +shared workload generators, and concrete DST targets. + +- root harness: + `seed.rs`, `trace.rs`, `subsystem.rs`, `runner.rs` +- root generic helpers: + `bugbase.rs`, `shrink.rs` +- root shared target helpers: + `config.rs`, `schema.rs` +- `workload/`: + shared table-style workload split into scenarios, generation, model, and + properties +- `sim/`: + reusable simulator primitives like `scheduler.rs` and `sync.rs` +- `targets/`: + `datastore.rs`, `relational_db.rs` +- binary: + `src/bin/dst.rs` + +## Reading Order + +If you are new to the crate, this order keeps the mental model small: + +1. `subsystem.rs` +2. `runner.rs` +3. `seed.rs` +4. `trace.rs` +5. `sim/scheduler.rs` +6. `config.rs` +7. `schema.rs` +8. `workload/table_ops/` +9. `bugbase.rs` +10. `shrink.rs` +11. `targets/datastore.rs` +12. `targets/relational_db.rs` + +## Core Model + +Most code in the crate revolves around the same shape: + +- `Case`: generated input for one deterministic run. +- `Trace`: ordered execution record. +- `Outcome`: final observable result. +- Invariants: assertions over the run record. + +That separation is intentional: + +- generation decides what to try, +- execution decides what happened, +- invariants decide whether the run is acceptable, +- shrinking tries to keep the failure while deleting unnecessary steps. + +## Shared Table Workload Map + +The main reusable DST workload now lives in `workload/table_ops/`: + +1. `types.rs` + common scenario, interaction, event, outcome, and engine traits +2. `properties.rs` + first-class properties such as visibility, row-count, and banking table + matching +3. `scenarios/` + scenario-specific schema generation like `random_crud` and `banking` +4. `model.rs` + generator model and expected-state model +5. `generation.rs` + `InteractionStream` and scenario-aware workload planning +6. `runner.rs` + generic execute/run helpers shared by multiple targets + +Concrete targets like `targets/datastore.rs` and `targets/relational_db.rs` +reuse that workload and swap in target-specific engines. + +## Failure Flow + +For a failing target case: + +1. `run_case_detailed` returns `DatastoreExecutionFailure` +2. root `bugbase.rs` can serialize failure plus original case +3. root `shrink.rs` truncates after failure and tries removing interactions + while preserving the same failure reason + +## CLI + +Long DST runs are intended to be driven from CLI, not from `#[test]`. + +Core commands: + +```bash +cargo run -p spacetimedb-dst -- run --target datastore --scenario banking --duration 5m +cargo run -p spacetimedb-dst -- run --target relational-db --seed 42 --max-interactions 2000 +cargo run -p spacetimedb-dst -- replay --target datastore bug.json +cargo run -p spacetimedb-dst -- shrink --target datastore bug.json +``` + +Library unit tests remain for deterministic helpers, shrinking, and small +target correctness checks. Scenario soak runs should go through CLI. + +## Current Scope + +This crate provides deterministic replay primitives, shared table workload +generation, two concrete targets (`datastore` and `relational_db`), and a +small CLI for seeded or duration-bounded runs. diff --git a/crates/dst/src/config.rs b/crates/dst/src/config.rs new file mode 100644 index 00000000000..5147bc90803 --- /dev/null +++ b/crates/dst/src/config.rs @@ -0,0 +1,61 @@ +//! Shared run-budget configuration for DST targets. + +use std::time::{Duration, Instant}; + +use serde::{Deserialize, Serialize}; + +/// Common stop conditions for generated DST runs. +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +pub struct RunConfig { + /// Hard cap on generated interactions. `None` means no interaction budget. + pub max_interactions: Option, + /// Wall-clock duration budget in milliseconds. `None` means no time budget. + pub max_duration_ms: Option, +} + +impl RunConfig { + pub fn with_max_interactions(max_interactions: usize) -> Self { + Self { + max_interactions: Some(max_interactions), + max_duration_ms: None, + } + } + + pub fn with_duration_spec(duration: &str) -> anyhow::Result { + Ok(Self { + max_interactions: None, + max_duration_ms: Some(parse_duration_spec(duration)?.as_millis() as u64), + }) + } + + pub fn deadline(&self) -> Option { + self.max_duration_ms + .map(Duration::from_millis) + .map(|duration| Instant::now() + duration) + } + + pub fn max_interactions_or_default(&self, default: usize) -> usize { + self.max_interactions.unwrap_or(default) + } +} + +pub fn parse_duration_spec(spec: &str) -> anyhow::Result { + let spec = spec.trim(); + if spec.is_empty() { + anyhow::bail!("duration spec cannot be empty"); + } + + let split_at = spec + .find(|ch: char| !ch.is_ascii_digit()) + .ok_or_else(|| anyhow::anyhow!("duration spec missing unit: {spec}"))?; + let (digits, unit) = spec.split_at(split_at); + let value: u64 = digits.parse()?; + + match unit { + "ms" => Ok(Duration::from_millis(value)), + "s" => Ok(Duration::from_secs(value)), + "m" => Ok(Duration::from_secs(value.saturating_mul(60))), + "h" => Ok(Duration::from_secs(value.saturating_mul(60 * 60))), + _ => anyhow::bail!("unsupported duration unit: {unit}"), + } +} diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index b8764c6b4e8..dde53f7efb0 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -4,9 +4,12 @@ //! //! - Root: harness pieces such as [`seed`], [`trace`], [`subsystem`], and //! [`runner`]. +//! - Root shared target helpers: [`config`] and [`schema`]. //! - Root generic helpers: [`bugbase`] and [`shrink`]. //! - [`sim`]: reusable simulator primitives such as [`scheduler`] and [`sync`]. -//! - [`targets`]: concrete simulation targets such as [`datastore_sim`]. +//! - [`workload`]: shared workload/model/property generation reused by targets. +//! - [`targets`]: concrete simulation targets such as [`datastore_sim`] and +//! `relational_db`. //! //! Reading guide: //! @@ -15,34 +18,42 @@ //! - Then read [`runner`] for the small orchestration helpers that generate, //! run, and replay a case. //! - Read [`sim`] for reusable simulation building blocks. -//! - For the datastore simulator itself, read [`datastore_sim`] top-down: -//! case format, generator, executor, then the expected-state model used by -//! the final consistency check. +//! - Read [`workload`] for shared table-workload planning split into +//! scenarios, generation, model, and properties. +//! - Then read the concrete targets in [`targets`]. +//! - [`config`] and [`schema`] hold reusable target-side data shapes. //! - [`bugbase`] and [`shrink`] are the debugging path after a failure. //! -//! The crate is intentionally a library crate. It exposes reusable pieces for -//! tests and future binaries rather than providing a CLI directly. +//! The crate is primarily a library crate, but long-running DST workloads are +//! intended to be driven through the `dst` binary via `run`, `replay`, and +//! `shrink` commands. /// Generic persisted failure artifacts and JSON helpers. pub mod bugbase; +/// Shared run-budget configuration for DST targets. +pub mod config; /// Small helpers for generating, running, rerunning, and replay-checking cases. pub mod runner; +/// Shared schema and row model used by DST targets. +pub mod schema; /// Stable seed and RNG utilities used to make runs reproducible. pub mod seed; -/// Common traits and result types shared by DST subsystems. -pub mod subsystem; -/// Trace data structures used to record deterministic execution. -pub mod trace; /// Generic shrinking helpers. pub mod shrink; /// Reusable simulation primitives. pub mod sim; +/// Common traits and result types shared by DST subsystems. +pub mod subsystem; /// Concrete simulator targets. pub mod targets; +/// Trace data structures used to record deterministic execution. +pub mod trace; +/// Shared workload generators reused by multiple targets. +pub mod workload; -/// Higher-level randomized datastore simulator with schema and interaction plans. -pub use targets::datastore as datastore_sim; /// Generic actor scheduler used by deterministic simulations. pub use sim::scheduler; /// Small in-memory synchronization model used by scheduler-oriented tests. pub use sim::sync; +/// Higher-level randomized datastore simulator with schema and interaction plans. +pub use targets::datastore as datastore_sim; diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs new file mode 100644 index 00000000000..e6ef2abb392 --- /dev/null +++ b/crates/dst/src/main.rs @@ -0,0 +1,277 @@ +use std::{ + path::{Path, PathBuf}, + time::{SystemTime, UNIX_EPOCH}, +}; + +use clap::{Args, Parser, Subcommand, ValueEnum}; +use spacetimedb_dst::{ + config::RunConfig, + seed::DstSeed, + targets::{datastore, relational_db}, + workload::table_ops::TableScenarioId, +}; + +#[derive(Parser, Debug)] +#[command(name = "spacetimedb-dst")] +#[command(about = "Run deterministic simulation targets")] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand, Debug)] +enum Command { + Run(RunArgs), + Replay(ReplayArgs), + Shrink(ShrinkArgs), +} + +#[derive(Args, Debug, Clone)] +struct TargetArgs { + #[arg(long, value_enum, default_value_t = TargetKind::Datastore)] + target: TargetKind, + #[arg(long, value_enum, default_value_t = ScenarioKind::RandomCrud)] + scenario: ScenarioKind, +} + +#[derive(Args, Debug)] +struct RunArgs { + #[command(flatten)] + target: TargetArgs, + #[arg(long)] + seed: Option, + #[arg(long)] + duration: Option, + #[arg(long)] + max_interactions: Option, + #[arg(long)] + save_case: Option, +} + +#[derive(Args, Debug)] +struct ReplayArgs { + #[command(flatten)] + target: TargetArgs, + path: PathBuf, +} + +#[derive(Args, Debug)] +struct ShrinkArgs { + #[command(flatten)] + target: TargetArgs, + path: PathBuf, + #[arg(long)] + save_shrunk: Option, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] +enum TargetKind { + Datastore, + RelationalDb, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] +enum ScenarioKind { + RandomCrud, + Banking, +} + +impl From for TableScenarioId { + fn from(value: ScenarioKind) -> Self { + match value { + ScenarioKind::RandomCrud => TableScenarioId::RandomCrud, + ScenarioKind::Banking => TableScenarioId::Banking, + } + } +} + +fn main() -> anyhow::Result<()> { + match Cli::parse().command { + Command::Run(args) => run_command(args), + Command::Replay(args) => replay_command(args), + Command::Shrink(args) => shrink_command(args), + } +} + +fn run_command(args: RunArgs) -> anyhow::Result<()> { + let seed = resolve_seed(args.seed); + let config = build_config(args.duration.as_deref(), args.max_interactions)?; + let scenario = TableScenarioId::from(args.target.scenario); + + match args.target.target { + TargetKind::Datastore => run_datastore(seed, scenario, config, args.save_case), + TargetKind::RelationalDb => run_relational(seed, scenario, config, args.save_case), + } +} + +fn replay_command(args: ReplayArgs) -> anyhow::Result<()> { + match args.target.target { + TargetKind::Datastore => replay_datastore(&args.path), + TargetKind::RelationalDb => replay_relational(&args.path), + } +} + +fn shrink_command(args: ShrinkArgs) -> anyhow::Result<()> { + match args.target.target { + TargetKind::Datastore => shrink_datastore(&args.path, args.save_shrunk.as_ref()), + TargetKind::RelationalDb => shrink_relational(&args.path, args.save_shrunk.as_ref()), + } +} + +fn resolve_seed(seed: Option) -> DstSeed { + seed.map(DstSeed).unwrap_or_else(|| { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos() as u64; + DstSeed(nanos) + }) +} + +fn build_config(duration: Option<&str>, max_interactions: Option) -> anyhow::Result { + match (duration, max_interactions) { + (Some(duration), Some(max_interactions)) => Ok(RunConfig { + max_interactions: Some(max_interactions), + max_duration_ms: Some(spacetimedb_dst::config::parse_duration_spec(duration)?.as_millis() as u64), + }), + (Some(duration), None) => RunConfig::with_duration_spec(duration), + (None, Some(max_interactions)) => Ok(RunConfig::with_max_interactions(max_interactions)), + (None, None) => Ok(RunConfig::with_max_interactions(1_000)), + } +} + +fn run_datastore( + seed: DstSeed, + scenario: TableScenarioId, + config: RunConfig, + save_case: Option, +) -> anyhow::Result<()> { + if config.max_duration_ms.is_some() { + if save_case.is_some() { + anyhow::bail!("duration-based streamed runs do not support save-case"); + } + let outcome = datastore::run_generated_with_config_and_scenario(seed, scenario, config)?; + println!( + "ok target=datastore seed={} tables={} row_counts={:?}", + seed.0, + outcome.final_rows.len(), + outcome.final_row_counts + ); + return Ok(()); + } + + let max_interactions = config.max_interactions.unwrap_or(1_000); + let case = datastore::materialize_case(seed, scenario, max_interactions); + if let Some(path) = &save_case { + datastore::save_case(path, &case)?; + println!("saved_case={}", path.display()); + } + replay_datastore_case(&case) +} + +fn run_relational( + seed: DstSeed, + scenario: TableScenarioId, + config: RunConfig, + save_case: Option, +) -> anyhow::Result<()> { + if config.max_duration_ms.is_some() { + if save_case.is_some() { + anyhow::bail!("duration-based streamed runs do not support save-case"); + } + let outcome = relational_db::run_generated_with_config_and_scenario(seed, scenario, config)?; + println!( + "ok target=relational_db seed={} tables={} row_counts={:?}", + seed.0, + outcome.final_rows.len(), + outcome.final_row_counts + ); + return Ok(()); + } + + let max_interactions = config.max_interactions.unwrap_or(1_000); + let case = relational_db::materialize_case(seed, scenario, max_interactions); + if let Some(path) = &save_case { + relational_db::save_case(path, &case)?; + println!("saved_case={}", path.display()); + } + replay_relational_case(&case) +} + +fn replay_datastore(path: &Path) -> anyhow::Result<()> { + let case = datastore::load_case(path)?; + replay_datastore_case(&case) +} + +fn replay_relational(path: &Path) -> anyhow::Result<()> { + let case = relational_db::load_case(path)?; + replay_relational_case(&case) +} + +fn replay_datastore_case(case: &datastore::DatastoreSimulatorCase) -> anyhow::Result<()> { + match datastore::run_case_detailed(case) { + Ok(record) => { + println!( + "ok target=datastore seed={} steps={}", + record.seed.0, + record.case.interactions.len() + ); + Ok(()) + } + Err(failure) => { + println!( + "fail target=datastore seed={} step={} reason={}", + case.seed.0, failure.step_index, failure.reason + ); + anyhow::bail!("datastore case failed") + } + } +} + +fn replay_relational_case(case: &relational_db::RelationalDbSimulatorCase) -> anyhow::Result<()> { + match relational_db::run_case_detailed(case) { + Ok(record) => { + println!( + "ok target=relational_db seed={} steps={}", + record.seed.0, + record.case.interactions.len() + ); + Ok(()) + } + Err(failure) => { + println!( + "fail target=relational_db seed={} step={} reason={}", + case.seed.0, failure.step_index, failure.reason + ); + anyhow::bail!("relational_db case failed") + } + } +} + +fn shrink_datastore(path: &Path, save_shrunk: Option<&PathBuf>) -> anyhow::Result<()> { + let case = datastore::load_case(path)?; + let failure = datastore::run_case_detailed(&case).expect_err("shrink needs failing datastore case"); + let shrunk = datastore::shrink_failure(&case, &failure)?; + let out = shrunk_path(path, save_shrunk); + datastore::save_case(&out, &shrunk)?; + println!("shrunk_case={}", out.display()); + Ok(()) +} + +fn shrink_relational(path: &Path, save_shrunk: Option<&PathBuf>) -> anyhow::Result<()> { + let case = relational_db::load_case(path)?; + let failure = relational_db::run_case_detailed(&case).expect_err("shrink needs failing relational_db case"); + let shrunk = relational_db::shrink_failure(&case, &failure)?; + let out = shrunk_path(path, save_shrunk); + relational_db::save_case(&out, &shrunk)?; + println!("shrunk_case={}", out.display()); + Ok(()) +} + +fn shrunk_path(default_input: &Path, explicit: Option<&PathBuf>) -> PathBuf { + explicit.cloned().unwrap_or_else(|| { + let mut path = default_input.as_os_str().to_os_string(); + path.push(".shrunk.json"); + PathBuf::from(path) + }) +} diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs new file mode 100644 index 00000000000..11f189bc1d7 --- /dev/null +++ b/crates/dst/src/schema.rs @@ -0,0 +1,131 @@ +//! Shared schema and row model used by DST targets. + +use serde::{de::Deserializer, ser::Serializer, Deserialize, Serialize}; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; + +use crate::seed::DstRng; + +/// Generated schema for one simulator case. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct SchemaPlan { + /// User-visible tables installed before the workload starts. + pub tables: Vec, +} + +/// Table definition used by simulators. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct TablePlan { + /// Stable logical table name used in generated interactions and assertions. + pub name: String, + /// Ordered column definitions. Column 0 is treated as the primary id column. + pub columns: Vec, + /// Optional secondary indexed column used to exercise index installation paths. + pub secondary_index_col: Option, +} + +/// Column definition used by simulators. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct ColumnPlan { + /// Column name installed into the target schema. + pub name: String, + /// Algebraic type for generated values in this column. + pub ty: AlgebraicType, +} + +/// Serializable row representation used by generated interactions. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SimRow { + /// Column values in schema order. + pub values: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +enum SerdeAlgebraicValue { + U64(u64), + String(String), + Bool(bool), +} + +pub fn generate_supported_type(rng: &mut DstRng) -> AlgebraicType { + match rng.index(3) { + 0 => AlgebraicType::U64, + 1 => AlgebraicType::String, + _ => AlgebraicType::Bool, + } +} + +pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { + match ty { + AlgebraicType::U64 => AlgebraicValue::U64((rng.next_u64() % 1000) + idx as u64), + AlgebraicType::String => AlgebraicValue::String(format!("v{}_{}", idx, rng.next_u64() % 10_000).into()), + AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +impl From<&AlgebraicValue> for SerdeAlgebraicValue { + fn from(value: &AlgebraicValue) -> Self { + match value { + AlgebraicValue::U64(value) => Self::U64(*value), + AlgebraicValue::String(value) => Self::String(value.to_string()), + AlgebraicValue::Bool(value) => Self::Bool(*value), + other => panic!("unsupported value in simulator row serde: {other:?}"), + } + } +} + +impl From for AlgebraicValue { + fn from(value: SerdeAlgebraicValue) -> Self { + match value { + SerdeAlgebraicValue::U64(value) => Self::U64(value), + SerdeAlgebraicValue::String(value) => Self::String(value.into()), + SerdeAlgebraicValue::Bool(value) => Self::Bool(value), + } + } +} + +impl Serialize for SimRow { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let values = self.values.iter().map(SerdeAlgebraicValue::from).collect::>(); + values.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for SimRow { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let values = Vec::::deserialize(deserializer)? + .into_iter() + .map(AlgebraicValue::from) + .collect(); + Ok(Self { values }) + } +} + +impl SimRow { + pub fn to_product_value(&self) -> ProductValue { + ProductValue::from_iter(self.values.iter().cloned()) + } + + pub fn to_bsatn(&self) -> anyhow::Result> { + Ok(spacetimedb_sats::bsatn::to_vec(&self.to_product_value())?) + } + + pub fn from_product_value(value: ProductValue) -> Self { + SimRow { + values: value.elements.to_vec(), + } + } + + pub fn id(&self) -> Option { + match self.values.first() { + Some(AlgebraicValue::U64(value)) => Some(*value), + _ => None, + } + } +} diff --git a/crates/dst/src/subsystem.rs b/crates/dst/src/subsystem.rs index 299802f74f8..1978e70a9fd 100644 --- a/crates/dst/src/subsystem.rs +++ b/crates/dst/src/subsystem.rs @@ -25,10 +25,15 @@ pub trait DstSubsystem { /// Result of one fully executed deterministic run. #[derive(Clone, Debug, Eq, PartialEq)] pub struct RunRecord { + /// Human-readable subsystem name used in logs and replay diagnostics. pub subsystem: &'static str, + /// Top-level seed that produced this run. pub seed: DstSeed, + /// Full generated or loaded input case. pub case: C, + /// Optional execution trace collected while the case ran. pub trace: Option>, + /// Final target-specific outcome after execution completes. pub outcome: O, } diff --git a/crates/dst/src/targets/datastore.rs b/crates/dst/src/targets/datastore.rs index 25bd321e022..41bd1cf37db 100644 --- a/crates/dst/src/targets/datastore.rs +++ b/crates/dst/src/targets/datastore.rs @@ -1,28 +1,7 @@ -//! Randomized datastore simulator target. -//! -//! This is the highest-level subsystem in the crate: -//! -//! - generate a schema, -//! - generate a deterministic interaction stream or plan, -//! - execute the plan against a real datastore instance, -//! - compare the final committed datastore state against an in-memory model. -//! -//! The file is large, so it is easiest to read in this order: -//! -//! 1. case and interaction types, -//! 2. `generate_case` and `InteractionStream`, -//! 3. `run_case_detailed` / `run_generated_stream`, -//! 4. `execute_interaction`, -//! 5. `GenerationModel`, -//! 6. `ExpectedModel`. - -use std::{ - collections::{BTreeSet, VecDeque}, - fs, - path::Path, -}; +//! Randomized datastore simulator target built on the shared table workload. + +use std::path::Path; -use serde::{Deserialize, Serialize}; use spacetimedb_datastore::{ execution_context::Workload, locking_tx_datastore::{datastore::Locking, MutTxId}, @@ -34,7 +13,7 @@ use spacetimedb_lib::{ Identity, }; use spacetimedb_primitives::TableId; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; +use spacetimedb_sats::AlgebraicValue; use spacetimedb_schema::{ def::BTreeAlgorithm, schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, @@ -44,108 +23,48 @@ use spacetimedb_table::page_pool::PagePool; use crate::{ bugbase::{load_json, save_json, BugArtifact}, - seed::{DstRng, DstSeed}, - shrink::shrink_by_removing, + config::RunConfig, + schema::{SchemaPlan, SimRow}, + seed::DstSeed, subsystem::{DstSubsystem, RunRecord}, - trace::Trace, + targets::harness::{self, TableTargetHarness}, + workload::table_ops::{ + ConnectionWriteState, TableProperty, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, + TableWorkloadEvent, TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, + }, }; -/// Full input for one randomized datastore simulator run. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct DatastoreSimulatorCase { - pub seed: DstSeed, - pub num_connections: usize, - pub schema: SchemaPlan, - pub interactions: Vec, -} - -/// Generated schema for one simulator case. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct SchemaPlan { - pub tables: Vec, -} - -/// Table definition used by the simulator. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct TablePlan { - pub name: String, - pub columns: Vec, - pub secondary_index_col: Option, -} - -/// Column definition used by the simulator. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct ColumnPlan { - pub name: String, - pub kind: ColumnKind, -} - -/// Small set of column kinds currently supported by the simulator. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub enum ColumnKind { - U64, - String, - Bool, -} +pub type DatastoreScenario = TableScenarioId; +pub type DatastoreSimulatorCase = TableWorkloadCase; +pub type Interaction = TableWorkloadInteraction; +pub type DatastoreSimulatorEvent = TableWorkloadEvent; +pub type DatastoreSimulatorOutcome = TableWorkloadOutcome; +pub type DatastoreExecutionFailure = TableWorkloadExecutionFailure; +pub type DatastoreBugArtifact = BugArtifact; +pub type DatastoreRunConfig = RunConfig; +pub use crate::config::parse_duration_spec; -/// Serializable row representation used by generated interactions. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct SimRow { - pub values: Vec, -} +/// DST subsystem wrapper around the randomized datastore simulator. +pub struct DatastoreSimulatorSubsystem; -/// Serializable cell value used by generated interactions. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub enum SimValue { - U64(u64), - String(String), - Bool(bool), -} +struct DatastoreTarget; -/// One generated simulator step. -/// -/// The plan intentionally mixes mutations with immediate assertions so failures -/// are attributed to the first step that violates an invariant. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub enum Interaction { - BeginTx { conn: usize }, - CommitTx { conn: usize }, - RollbackTx { conn: usize }, - Insert { conn: usize, table: usize, row: SimRow }, - Delete { conn: usize, table: usize, row: SimRow }, - AssertVisibleInConnection { conn: usize, table: usize, row: SimRow }, - AssertMissingInConnection { conn: usize, table: usize, row: SimRow }, - AssertVisibleFresh { table: usize, row: SimRow }, - AssertMissingFresh { table: usize, row: SimRow }, - AssertRowCountFresh { table: usize, expected: u64 }, -} +impl TableTargetHarness for DatastoreTarget { + type Engine = DatastoreEngine; -/// Trace event for the datastore simulator. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub enum DatastoreSimulatorEvent { - Executed(Interaction), -} + fn target_name() -> &'static str { + DatastoreSimulatorSubsystem::name() + } -/// Final state collected from the datastore after the run. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct DatastoreSimulatorOutcome { - pub final_row_counts: Vec, - pub final_rows: Vec>, -} + fn connection_seed_discriminator() -> u64 { + 17 + } -/// Rich failure returned by `run_case_detailed`. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct DatastoreExecutionFailure { - pub step_index: usize, - pub reason: String, - pub interaction: Interaction, + fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { + DatastoreEngine::new(schema, num_connections) + } } -pub type DatastoreBugArtifact = BugArtifact; - -/// DST subsystem wrapper around the randomized datastore simulator. -pub struct DatastoreSimulatorSubsystem; - impl DstSubsystem for DatastoreSimulatorSubsystem { type Case = DatastoreSimulatorCase; type Event = DatastoreSimulatorEvent; @@ -156,11 +75,11 @@ impl DstSubsystem for DatastoreSimulatorSubsystem { } fn generate_case(seed: DstSeed) -> Self::Case { - generate_case(seed) + harness::generate_case::(seed, DatastoreScenario::RandomCrud) } fn run_case(case: &Self::Case) -> anyhow::Result> { - run_case_detailed(case).map_err(|failure| { + harness::run_case_detailed::(case).map_err(|failure| { anyhow::anyhow!( "datastore simulator failed at step {}: {}", failure.step_index, @@ -170,149 +89,56 @@ impl DstSubsystem for DatastoreSimulatorSubsystem { } } -/// Generates a deterministic simulator case from a seed. pub fn generate_case(seed: DstSeed) -> DatastoreSimulatorCase { - let mut rng = seed.fork(17).rng(); - let num_connections = rng.index(3) + 1; - let schema = generate_schema(&mut rng); - let interactions = - InteractionStream::new(seed, schema.clone(), num_connections, default_target_ops(&mut rng)).collect(); - DatastoreSimulatorCase { - seed, - num_connections, - schema, - interactions, - } + generate_case_for_scenario(seed, DatastoreScenario::RandomCrud) +} + +pub fn generate_case_for_scenario(seed: DstSeed, scenario: DatastoreScenario) -> DatastoreSimulatorCase { + harness::generate_case::(seed, scenario) +} + +pub fn materialize_case(seed: DstSeed, scenario: DatastoreScenario, max_interactions: usize) -> DatastoreSimulatorCase { + harness::materialize_case::(seed, scenario, max_interactions) } -/// Executes a generated case and returns either a full run record or the first -/// failing interaction. pub fn run_case_detailed( case: &DatastoreSimulatorCase, ) -> Result< RunRecord, DatastoreExecutionFailure, > { - run_interactions( - case.seed, - case.schema.clone(), - case.num_connections, - case.interactions.iter().cloned(), - Some(case.clone()), - ) + harness::run_case_detailed::(case) } -/// Executes a generated simulator workload without first materializing all -/// interactions in memory. pub fn run_generated_stream(seed: DstSeed, max_interactions: usize) -> anyhow::Result { - let mut rng = seed.fork(17).rng(); - let num_connections = rng.index(3) + 1; - let schema = generate_schema(&mut rng); - let stream = InteractionStream::new(seed, schema.clone(), num_connections, max_interactions); - let datastore = bootstrap_datastore()?; - let table_ids = install_schema(&datastore, &schema)?; - let mut execution = ExecutionState::new(num_connections); - let mut expected = ExpectedModel::new(table_ids.len(), num_connections); - - for (step_index, interaction) in stream.enumerate() { - execute_interaction(&datastore, &table_ids, &mut execution, &interaction).map_err(|reason| { - anyhow::anyhow!("datastore simulator failed at step {step_index}: {reason}") - })?; - expected.apply(&interaction); - } - - execution.rollback_all(&datastore); - - let outcome = collect_outcome(&datastore, &table_ids)?; - let expected_rows = expected.committed_rows(); - if outcome.final_rows != expected_rows { - anyhow::bail!( - "final datastore state mismatch: expected={expected_rows:?} actual={:?}", - outcome.final_rows - ); - } - - Ok(outcome) + run_generated_with_config(seed, DatastoreRunConfig::with_max_interactions(max_interactions)) } -fn run_interactions( +pub fn run_generated_with_config( seed: DstSeed, - schema: SchemaPlan, - num_connections: usize, - interactions: impl IntoIterator, - case_override: Option, -) -> Result< - RunRecord, - DatastoreExecutionFailure, -> { - let datastore = bootstrap_datastore().map_err(|err| failure_without_step(format!("bootstrap failed: {err}")))?; - let table_ids = install_schema(&datastore, &schema) - .map_err(|err| failure_without_step(format!("schema install failed: {err}")))?; - let mut trace = Trace::default(); - let mut execution = ExecutionState::new(num_connections); - let mut expected = ExpectedModel::new(table_ids.len(), num_connections); - let mut executed_interactions = Vec::new(); - - for (step_index, interaction) in interactions.into_iter().enumerate() { - trace.push(DatastoreSimulatorEvent::Executed(interaction.clone())); - execute_interaction(&datastore, &table_ids, &mut execution, &interaction).map_err(|reason| { - DatastoreExecutionFailure { - step_index, - reason, - interaction: interaction.clone(), - } - })?; - expected.apply(&interaction); - executed_interactions.push(interaction); - } - - execution.rollback_all(&datastore); - - let outcome = collect_outcome(&datastore, &table_ids) - .map_err(|err| failure_without_step(format!("collect outcome failed: {err}")))?; - let expected_rows = expected.committed_rows(); - if outcome.final_rows != expected_rows { - return Err(failure_without_step(format!( - "final datastore state mismatch: expected={expected_rows:?} actual={:?}", - outcome.final_rows - ))); - } + config: DatastoreRunConfig, +) -> anyhow::Result { + run_generated_with_config_and_scenario(seed, DatastoreScenario::RandomCrud, config) +} - let case = case_override.unwrap_or(DatastoreSimulatorCase { - seed, - num_connections, - schema, - interactions: executed_interactions, - }); - - Ok(RunRecord { - subsystem: DatastoreSimulatorSubsystem::name(), - seed, - case, - trace: Some(trace), - outcome, - }) +pub fn run_generated_with_config_and_scenario( + seed: DstSeed, + scenario: DatastoreScenario, + config: DatastoreRunConfig, +) -> anyhow::Result { + harness::run_generated_with_config_and_scenario::(seed, scenario, config) } -/// Saves a simulator case as JSON for replay or debugging. pub fn save_case(path: impl AsRef, case: &DatastoreSimulatorCase) -> anyhow::Result<()> { - let body = serde_json::to_string_pretty(case)?; - fs::write(path, body)?; - Ok(()) + harness::save_case(path, case) } -/// Loads a simulator case previously written by [`save_case`]. pub fn load_case(path: impl AsRef) -> anyhow::Result { - let body = fs::read_to_string(path)?; - Ok(serde_json::from_str(&body)?) + harness::load_case(path) } -/// Runs a case and extracts only the failure reason. pub fn failure_reason(case: &DatastoreSimulatorCase) -> anyhow::Result { - match run_case_detailed(case) { - Ok(_) => anyhow::bail!("case did not fail"), - Err(failure) => Ok(failure.reason), - } + harness::failure_reason::(case) } pub fn save_bug_artifact(path: impl AsRef, artifact: &DatastoreBugArtifact) -> anyhow::Result<()> { @@ -327,76 +153,266 @@ pub fn shrink_failure( case: &DatastoreSimulatorCase, failure: &DatastoreExecutionFailure, ) -> anyhow::Result { - shrink_by_removing( - case, - failure, - |case| { - let mut shrunk = case.clone(); - shrunk.interactions.truncate(failure.step_index.saturating_add(1)); - shrunk - }, - |case| case.interactions.len(), - remove_interaction, - |case| match run_case_detailed(case) { - Ok(_) => anyhow::bail!("case did not fail"), - Err(failure) => Ok(failure), - }, - |expected, candidate| expected.reason == candidate.reason, - ) + harness::shrink_failure::(case, failure) } -fn remove_interaction(case: &DatastoreSimulatorCase, idx: usize) -> Option { - let interaction = case.interactions.get(idx)?; - if matches!( - interaction, - Interaction::CommitTx { .. } | Interaction::RollbackTx { .. } - ) { - return None; +/// Concrete datastore execution harness for the shared table workload. +struct DatastoreEngine { + datastore: Locking, + table_ids: Vec, + execution: ConnectionWriteState, +} + +impl DatastoreEngine { + fn new(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { + let datastore = bootstrap_datastore()?; + let table_ids = install_schema(&datastore, schema)?; + Ok(Self { + datastore, + table_ids, + execution: ConnectionWriteState::new(num_connections), + }) } - let mut interactions = case.interactions.clone(); - interactions.remove(idx); - Some(DatastoreSimulatorCase { - seed: case.seed, - num_connections: case.num_connections, - schema: case.schema.clone(), - interactions, - }) + fn with_mut_tx( + &mut self, + conn: usize, + table: usize, + mut f: impl FnMut(&Locking, TableId, &mut MutTxId) -> Result<(), String>, + ) -> Result<(), String> { + let table_id = *self + .table_ids + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + self.execution.ensure_known_connection(conn)?; + let slot = &mut self.execution.tx_by_connection[conn]; + + match slot { + Some(tx) => f(&self.datastore, table_id, tx), + None => { + if let Some(owner) = self.execution.active_writer { + return Err(format!( + "connection {conn} cannot auto-commit write while connection {owner} owns lock" + )); + } + let mut tx = self + .datastore + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + self.execution.active_writer = Some(conn); + f(&self.datastore, table_id, &mut tx)?; + self.datastore + .commit_mut_tx(tx) + .map_err(|err| format!("auto-commit failed on connection {conn}: {err}"))?; + self.execution.active_writer = None; + Ok(()) + } + } + } + + fn fresh_lookup(&self, table_id: TableId, id: u64) -> anyhow::Result> { + let tx = self.datastore.begin_tx(Workload::ForTests); + Ok(tx + .table_scan(table_id)? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .find(|row| row.id() == Some(id))) + } + + fn collect_rows_for_table(&self, table: usize) -> anyhow::Result> { + let table_id = *self + .table_ids + .get(table) + .ok_or_else(|| anyhow::anyhow!("table {table} out of range"))?; + let tx = self.datastore.begin_tx(Workload::ForTests); + let mut rows = tx + .table_scan(table_id)? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } } -fn generate_schema(rng: &mut DstRng) -> SchemaPlan { - let table_count = rng.index(3) + 1; - let mut tables = Vec::with_capacity(table_count); - - for table_idx in 0..table_count { - let extra_cols = rng.index(3); - let mut columns = vec![ColumnPlan { - name: "id".into(), - kind: ColumnKind::U64, - }]; - for col_idx in 0..extra_cols { - columns.push(ColumnPlan { - name: format!("c{table_idx}_{col_idx}"), - kind: match rng.index(3) { - 0 => ColumnKind::U64, - 1 => ColumnKind::String, - _ => ColumnKind::Bool, - }, - }); +impl TableWorkloadEngine for DatastoreEngine { + fn execute(&mut self, interaction: &Interaction) -> Result<(), String> { + match interaction { + Interaction::BeginTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + if self.execution.tx_by_connection[*conn].is_some() { + return Err(format!("connection {conn} already has open transaction")); + } + if let Some(owner) = self.execution.active_writer { + return Err(format!( + "connection {conn} cannot begin write transaction while connection {owner} owns lock" + )); + } + self.execution.tx_by_connection[*conn] = Some( + self.datastore + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), + ); + self.execution.active_writer = Some(*conn); + } + Interaction::CommitTx { conn } => { + self.execution.ensure_writer_owner(*conn, "commit")?; + let tx = self.execution.tx_by_connection[*conn] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; + self.datastore + .commit_mut_tx(tx) + .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; + self.execution.active_writer = None; + } + Interaction::RollbackTx { conn } => { + self.execution.ensure_writer_owner(*conn, "rollback")?; + let tx = self.execution.tx_by_connection[*conn] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; + let _ = self.datastore.rollback_mut_tx(tx); + self.execution.active_writer = None; + } + Interaction::Insert { conn, table, row } => { + self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + datastore + .insert_mut_tx(tx, table_id, &bsatn) + .map_err(|err| format!("insert failed: {err}"))?; + Ok(()) + })?; + } + Interaction::Delete { conn, table, row } => { + self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { + let deleted = datastore.delete_by_rel_mut_tx(tx, table_id, [row.to_product_value()]); + if deleted != 1 { + return Err(format!("delete expected 1 row, got {deleted}")); + } + Ok(()) + })?; + } + Interaction::Check(TableProperty::VisibleInConnection { conn, table, row }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { + self.datastore + .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .any(|candidate| candidate == *row) + } else { + self.fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))? + == Some(row.clone()) + }; + if !found { + return Err(format!("row not visible in connection after write: {row:?}")); + } + } + Interaction::Check(TableProperty::MissingInConnection { conn, table, row }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { + self.datastore + .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .next() + .is_some() + } else { + self.fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))? + .is_some() + }; + if found { + return Err(format!("row still visible in connection after delete: {row:?}")); + } + } + Interaction::Check(TableProperty::VisibleFresh { table, row }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = self + .fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))?; + if found != Some(row.clone()) { + return Err(format!("fresh lookup mismatch: expected={row:?} actual={found:?}")); + } + } + Interaction::Check(TableProperty::MissingFresh { table, row }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if self + .fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))? + .is_some() + { + return Err(format!("fresh lookup still found deleted row: {row:?}")); + } + } + Interaction::Check(TableProperty::RowCountFresh { table, expected }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let actual = self.datastore.begin_tx(Workload::ForTests).row_count(table_id); + if actual != *expected { + return Err(format!("row count mismatch: expected={expected} actual={actual}")); + } + } + Interaction::Check(TableProperty::TablesMatchFresh { left, right }) => { + let left_rows = self + .collect_rows_for_table(*left) + .map_err(|err| format!("left table collect failed: {err}"))?; + let right_rows = self + .collect_rows_for_table(*right) + .map_err(|err| format!("right table collect failed: {err}"))?; + if left_rows != right_rows { + return Err(format!( + "fresh table mismatch: left_table={left} right_table={right} left={left_rows:?} right={right_rows:?}" + )); + } + } } - let secondary_index_col = (columns.len() > 1 && rng.index(100) < 50).then_some(1); - tables.push(TablePlan { - name: format!("dst_table_{table_idx}_{}", rng.next_u64() % 10_000), - columns, - secondary_index_col, - }); + + Ok(()) } - SchemaPlan { tables } -} + fn collect_outcome(&mut self) -> anyhow::Result { + let tx = self.datastore.begin_tx(Workload::ForTests); + let mut final_rows = Vec::with_capacity(self.table_ids.len()); + let mut final_row_counts = Vec::with_capacity(self.table_ids.len()); + + for &table_id in &self.table_ids { + let mut rows = tx + .table_scan(table_id)? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + final_row_counts.push(rows.len() as u64); + final_rows.push(rows); + } + + Ok(DatastoreSimulatorOutcome { + final_row_counts, + final_rows, + }) + } -fn default_target_ops(rng: &mut DstRng) -> usize { - 24 + rng.index(24) + fn finish(&mut self) { + for tx in &mut self.execution.tx_by_connection { + if let Some(tx) = tx.take() { + let _ = self.datastore.rollback_mut_tx(tx); + } + } + self.execution.active_writer = None; + } } fn bootstrap_datastore() -> spacetimedb_datastore::Result { @@ -412,7 +428,7 @@ fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result>(); let mut indexes = vec![IndexSchema::for_test( @@ -455,690 +471,30 @@ fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result Result<(), String> { - match interaction { - Interaction::BeginTx { conn } => { - execution.ensure_known_connection(*conn)?; - if execution.tx_by_connection[*conn].is_some() { - return Err(format!("connection {conn} already has open transaction")); - } - if let Some(owner) = execution.active_writer { - return Err(format!( - "connection {conn} cannot begin write transaction while connection {owner} owns lock" - )); - } - execution.tx_by_connection[*conn] = - Some(datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests)); - execution.active_writer = Some(*conn); - } - Interaction::CommitTx { conn } => { - execution.ensure_writer_owner(*conn, "commit")?; - let tx = execution.tx_by_connection[*conn] - .take() - .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; - datastore - .commit_mut_tx(tx) - .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; - execution.active_writer = None; - } - Interaction::RollbackTx { conn } => { - execution.ensure_writer_owner(*conn, "rollback")?; - let tx = execution.tx_by_connection[*conn] - .take() - .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; - let _ = datastore.rollback_mut_tx(tx); - execution.active_writer = None; - } - Interaction::Insert { conn, table, row } => { - with_mut_tx( - datastore, - table_ids, - execution, - *conn, - *table, - |datastore, table_id, tx| { - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - datastore - .insert_mut_tx(tx, table_id, &bsatn) - .map_err(|err| format!("insert failed: {err}"))?; - Ok(()) - }, - )?; - } - Interaction::Delete { conn, table, row } => { - with_mut_tx( - datastore, - table_ids, - execution, - *conn, - *table, - |datastore, table_id, tx| { - let deleted = datastore.delete_by_rel_mut_tx(tx, table_id, [row.to_product_value()]); - if deleted != 1 { - return Err(format!("delete expected 1 row, got {deleted}")); - } - Ok(()) - }, - )?; - } - Interaction::AssertVisibleInConnection { conn, table, row } => { - let table_id = *table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = if let Some(Some(tx)) = execution.tx_by_connection.get(*conn) { - datastore - .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("in-tx lookup failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .any(|candidate| candidate == *row) - } else { - fresh_lookup(datastore, table_id, id).map_err(|err| format!("fresh lookup failed: {err}"))? - == Some(row.clone()) - }; - if !found { - return Err(format!("row not visible in connection after write: {row:?}")); - } - } - Interaction::AssertMissingInConnection { conn, table, row } => { - let table_id = *table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = if let Some(Some(tx)) = execution.tx_by_connection.get(*conn) { - datastore - .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("in-tx lookup failed: {err}"))? - .next() - .is_some() - } else { - fresh_lookup(datastore, table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))? - .is_some() - }; - if found { - return Err(format!("row still visible in connection after delete: {row:?}")); - } - } - Interaction::AssertVisibleFresh { table, row } => { - let table_id = *table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = fresh_lookup(datastore, table_id, id).map_err(|err| format!("fresh lookup failed: {err}"))?; - if found != Some(row.clone()) { - return Err(format!("fresh lookup mismatch: expected={row:?} actual={found:?}")); - } - } - Interaction::AssertMissingFresh { table, row } => { - let table_id = *table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - if fresh_lookup(datastore, table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))? - .is_some() - { - return Err(format!("fresh lookup still found deleted row: {row:?}")); - } - } - Interaction::AssertRowCountFresh { table, expected } => { - let table_id = *table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let actual = datastore.begin_tx(Workload::ForTests).row_count(table_id); - if actual != *expected { - return Err(format!("row count mismatch: expected={expected} actual={actual}")); - } - } - } - - Ok(()) -} - -fn with_mut_tx( - datastore: &Locking, - table_ids: &[TableId], - execution: &mut ExecutionState, - conn: usize, - table: usize, - mut f: impl FnMut(&Locking, TableId, &mut MutTxId) -> Result<(), String>, -) -> Result<(), String> { - let table_id = *table_ids - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - execution.ensure_known_connection(conn)?; - let slot = &mut execution.tx_by_connection[conn]; - - match slot { - Some(tx) => f(datastore, table_id, tx), - None => { - if let Some(owner) = execution.active_writer { - return Err(format!( - "connection {conn} cannot auto-commit write while connection {owner} owns lock" - )); - } - let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - execution.active_writer = Some(conn); - f(datastore, table_id, &mut tx)?; - datastore - .commit_mut_tx(tx) - .map_err(|err| format!("auto-commit failed on connection {conn}: {err}"))?; - execution.active_writer = None; - Ok(()) - } - } -} - -fn fresh_lookup(datastore: &Locking, table_id: TableId, id: u64) -> anyhow::Result> { - let tx = datastore.begin_tx(Workload::ForTests); - Ok(tx - .table_scan(table_id)? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .find(|row| row.id() == Some(id))) -} - -fn collect_outcome(datastore: &Locking, table_ids: &[TableId]) -> anyhow::Result { - let tx = datastore.begin_tx(Workload::ForTests); - let mut final_rows = Vec::with_capacity(table_ids.len()); - let mut final_row_counts = Vec::with_capacity(table_ids.len()); - - for &table_id in table_ids { - let mut rows = tx - .table_scan(table_id)? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - final_row_counts.push(rows.len() as u64); - final_rows.push(rows); - } - - Ok(DatastoreSimulatorOutcome { - final_row_counts, - final_rows, - }) -} - -fn failure_without_step(reason: String) -> DatastoreExecutionFailure { - DatastoreExecutionFailure { - step_index: usize::MAX, - reason, - interaction: Interaction::AssertRowCountFresh { - table: usize::MAX, - expected: 0, - }, - } -} - -impl ColumnKind { - fn to_algebraic_type(&self) -> AlgebraicType { - match self { - ColumnKind::U64 => AlgebraicType::U64, - ColumnKind::String => AlgebraicType::String, - ColumnKind::Bool => AlgebraicType::Bool, - } - } -} - -impl SimValue { - fn to_algebraic_value(&self) -> AlgebraicValue { - match self { - SimValue::U64(value) => AlgebraicValue::U64(*value), - SimValue::String(value) => AlgebraicValue::String(value.clone().into()), - SimValue::Bool(value) => AlgebraicValue::Bool(*value), - } - } - - fn from_algebraic_value(value: AlgebraicValue) -> Self { - match value { - AlgebraicValue::U64(value) => SimValue::U64(value), - AlgebraicValue::String(value) => SimValue::String(value.to_string()), - AlgebraicValue::Bool(value) => SimValue::Bool(value), - other => panic!("unsupported value in simulator row: {other:?}"), - } - } -} - -impl SimRow { - fn to_product_value(&self) -> ProductValue { - ProductValue::from_iter(self.values.iter().map(SimValue::to_algebraic_value)) - } - - fn to_bsatn(&self) -> anyhow::Result> { - Ok(spacetimedb_sats::bsatn::to_vec(&self.to_product_value())?) - } - - fn from_product_value(value: ProductValue) -> Self { - SimRow { - values: value.elements.into_iter().map(SimValue::from_algebraic_value).collect(), - } - } - - fn id(&self) -> Option { - match self.values.first() { - Some(SimValue::U64(value)) => Some(*value), - _ => None, - } - } -} - -struct ExecutionState { - tx_by_connection: Vec>, - active_writer: Option, -} - -impl ExecutionState { - fn new(connection_count: usize) -> Self { - Self { - tx_by_connection: (0..connection_count).map(|_| None).collect(), - active_writer: None, - } - } - - fn ensure_known_connection(&self, conn: usize) -> Result<(), String> { - self.tx_by_connection - .get(conn) - .map(|_| ()) - .ok_or_else(|| format!("connection {conn} out of range")) - } - - fn ensure_writer_owner(&self, conn: usize, action: &str) -> Result<(), String> { - self.ensure_known_connection(conn)?; - match self.active_writer { - Some(owner) if owner == conn => Ok(()), - Some(owner) => Err(format!( - "connection {conn} cannot {action} while connection {owner} owns lock" - )), - None => Err(format!("connection {conn} has no transaction to {action}")), - } - } - - fn rollback_all(&mut self, datastore: &Locking) { - for tx in &mut self.tx_by_connection { - if let Some(tx) = tx.take() { - let _ = datastore.rollback_mut_tx(tx); - } - } - self.active_writer = None; - } -} - -#[derive(Clone, Debug)] -struct InteractionStream { - rng: DstRng, - model: GenerationModel, - num_connections: usize, - target_interactions: usize, - emitted: usize, - finalize_conn: usize, - pending: VecDeque, - finished: bool, -} - -impl InteractionStream { - fn new(seed: DstSeed, schema: SchemaPlan, num_connections: usize, target_interactions: usize) -> Self { - Self { - rng: seed.fork(17).rng(), - model: GenerationModel::new(&schema, num_connections, seed), - num_connections, - target_interactions, - emitted: 0, - finalize_conn: 0, - pending: VecDeque::new(), - finished: false, - } - } - - fn fill_pending(&mut self) { - if self.emitted >= self.target_interactions { - while self.finalize_conn < self.num_connections { - let conn = self.finalize_conn; - self.finalize_conn += 1; - if self.model.connections[conn].in_tx { - let followups = self.model.commit(conn); - self.pending.push_back(Interaction::CommitTx { conn }); - self.pending.extend(followups); - return; - } - } - self.finished = true; - return; - } - - let conn = self - .model - .active_writer() - .unwrap_or_else(|| self.rng.index(self.num_connections)); - - if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() && self.rng.index(100) < 20 { - self.model.begin_tx(conn); - self.pending.push_back(Interaction::BeginTx { conn }); - return; - } - - if self.model.connections[conn].in_tx && self.rng.index(100) < 15 { - let followups = self.model.commit(conn); - self.pending.push_back(Interaction::CommitTx { conn }); - self.pending.extend(followups); - return; - } - - if self.model.connections[conn].in_tx && self.rng.index(100) < 10 { - let followups = self.model.rollback(conn); - self.pending.push_back(Interaction::RollbackTx { conn }); - self.pending.extend(followups); - return; - } - - let table = self.rng.index(self.model.schema.tables.len()); - let visible_rows = self.model.visible_rows(conn, table); - let choose_insert = visible_rows.is_empty() || self.rng.index(100) < 65; - if choose_insert { - let row = self.model.make_row(&mut self.rng, table); - self.model.insert(conn, table, row.clone()); - self.pending.push_back(Interaction::Insert { - conn, - table, - row: row.clone(), - }); - self.pending.push_back(Interaction::AssertVisibleInConnection { conn, table, row }); - if !self.model.connections[conn].in_tx { - let row = self.model.last_inserted_row(conn).expect("tracked auto-commit insert"); - self.pending.push_back(Interaction::AssertVisibleFresh { table, row }); - } - return; - } - - let row = visible_rows[self.rng.index(visible_rows.len())].clone(); - self.model.delete(conn, table, row.clone()); - self.pending.push_back(Interaction::Delete { - conn, - table, - row: row.clone(), - }); - self.pending.push_back(Interaction::AssertMissingInConnection { - conn, - table, - row: row.clone(), - }); - if !self.model.connections[conn].in_tx { - self.pending.push_back(Interaction::AssertMissingFresh { table, row }); - } - } -} - -impl Iterator for InteractionStream { - type Item = Interaction; - - fn next(&mut self) -> Option { - loop { - if let Some(interaction) = self.pending.pop_front() { - self.emitted += 1; - return Some(interaction); - } - - if self.finished { - return None; - } - - self.fill_pending(); - } - } -} - -#[derive(Clone, Debug)] -struct GenerationModel { - schema: SchemaPlan, - connections: Vec, - committed: Vec>, - next_ids: Vec, - active_writer: Option, -} - -#[derive(Clone, Debug, Default)] -struct PendingConnection { - in_tx: bool, - staged_inserts: Vec<(usize, SimRow)>, - staged_deletes: Vec<(usize, SimRow)>, - last_auto_committed_insert: Option, -} - -impl GenerationModel { - fn new(schema: &SchemaPlan, num_connections: usize, seed: DstSeed) -> Self { - Self { - schema: schema.clone(), - connections: vec![PendingConnection::default(); num_connections], - committed: vec![Vec::new(); schema.tables.len()], - next_ids: (0..schema.tables.len()) - .map(|idx| seed.fork(idx as u64 + 100).0) - .collect(), - active_writer: None, - } - } - - fn make_row(&mut self, rng: &mut DstRng, table: usize) -> SimRow { - let table_plan = &self.schema.tables[table]; - let id = self.next_ids[table]; - self.next_ids[table] = self.next_ids[table].wrapping_add(1).max(1); - let mut values = vec![SimValue::U64(id)]; - for (idx, col) in table_plan.columns.iter().enumerate().skip(1) { - values.push(match col.kind { - ColumnKind::U64 => SimValue::U64((rng.next_u64() % 1000) + idx as u64), - ColumnKind::String => SimValue::String(format!("v{}_{}", idx, rng.next_u64() % 10_000)), - ColumnKind::Bool => SimValue::Bool(rng.index(2) == 0), - }); - } - SimRow { values } - } - - fn visible_rows(&self, conn: usize, table: usize) -> Vec { - let mut rows = self.committed[table].clone(); - let pending = &self.connections[conn]; - for (pending_table, row) in &pending.staged_deletes { - if *pending_table == table { - rows.retain(|candidate| candidate != row); - } - } - for (pending_table, row) in &pending.staged_inserts { - if *pending_table == table { - rows.push(row.clone()); - } - } - rows - } - - fn active_writer(&self) -> Option { - self.active_writer - } - - fn begin_tx(&mut self, conn: usize) { - assert!(self.active_writer.is_none(), "single writer already active"); - let pending = &mut self.connections[conn]; - assert!(!pending.in_tx, "connection already in transaction"); - pending.in_tx = true; - self.active_writer = Some(conn); - } - - fn insert(&mut self, conn: usize, table: usize, row: SimRow) { - let pending = &mut self.connections[conn]; - if pending.in_tx { - pending.staged_inserts.push((table, row)); - } else { - self.committed[table].push(row.clone()); - pending.last_auto_committed_insert = Some(row); - } - } - - fn last_inserted_row(&self, conn: usize) -> Option { - self.connections[conn].last_auto_committed_insert.clone() - } - - fn delete(&mut self, conn: usize, table: usize, row: SimRow) { - let pending = &mut self.connections[conn]; - if pending.in_tx { - pending - .staged_inserts - .retain(|(pending_table, candidate)| !(*pending_table == table && *candidate == row)); - pending.staged_deletes.push((table, row)); - } else { - self.committed[table].retain(|candidate| *candidate != row); - } - } - - fn commit(&mut self, conn: usize) -> Vec { - let pending = &mut self.connections[conn]; - let inserts = std::mem::take(&mut pending.staged_inserts); - let deletes = std::mem::take(&mut pending.staged_deletes); - pending.in_tx = false; - self.active_writer = None; - - for (table, row) in &deletes { - self.committed[*table].retain(|candidate| candidate != row); - } - for (table, row) in &inserts { - self.committed[*table].push(row.clone()); - } - - let mut followups = Vec::new(); - for (table, row) in inserts { - followups.push(Interaction::AssertVisibleFresh { table, row }); - } - for (table, row) in deletes { - followups.push(Interaction::AssertMissingFresh { table, row }); - } - followups - } - - fn rollback(&mut self, conn: usize) -> Vec { - let pending = &mut self.connections[conn]; - let touched_tables = pending - .staged_inserts - .iter() - .chain(pending.staged_deletes.iter()) - .map(|(table, _)| *table) - .collect::>(); - pending.staged_inserts.clear(); - pending.staged_deletes.clear(); - pending.in_tx = false; - self.active_writer = None; - touched_tables - .into_iter() - .map(|table| Interaction::AssertRowCountFresh { - table, - expected: self.committed[table].len() as u64, - }) - .collect() - } -} - -#[derive(Clone, Debug)] -struct ExpectedModel { - committed: Vec>, - connections: Vec, - active_writer: Option, -} - -#[derive(Clone, Debug, Default)] -struct ExpectedConnection { - in_tx: bool, - staged_inserts: Vec<(usize, SimRow)>, - staged_deletes: Vec<(usize, SimRow)>, -} - -impl ExpectedModel { - fn new(table_count: usize, connection_count: usize) -> Self { - Self { - committed: vec![Vec::new(); table_count], - connections: vec![ExpectedConnection::default(); connection_count], - active_writer: None, - } - } - - fn apply(&mut self, interaction: &Interaction) { - match interaction { - Interaction::BeginTx { conn } => { - assert!(self.active_writer.is_none(), "multiple concurrent writers in expected model"); - self.connections[*conn].in_tx = true; - self.active_writer = Some(*conn); - } - Interaction::CommitTx { conn } => { - assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in expected model"); - let state = &mut self.connections[*conn]; - for (table, row) in state.staged_deletes.drain(..) { - self.committed[table].retain(|candidate| *candidate != row); - } - for (table, row) in state.staged_inserts.drain(..) { - self.committed[table].push(row); - } - state.in_tx = false; - self.active_writer = None; - } - Interaction::RollbackTx { conn } => { - assert_eq!(self.active_writer, Some(*conn), "rollback by non-owner in expected model"); - let state = &mut self.connections[*conn]; - state.staged_inserts.clear(); - state.staged_deletes.clear(); - state.in_tx = false; - self.active_writer = None; - } - Interaction::Insert { conn, table, row } => { - let state = &mut self.connections[*conn]; - if state.in_tx { - state.staged_inserts.push((*table, row.clone())); - } else { - self.committed[*table].push(row.clone()); - } - } - Interaction::Delete { conn, table, row } => { - let state = &mut self.connections[*conn]; - if state.in_tx { - state - .staged_inserts - .retain(|(pending_table, candidate)| !(*pending_table == *table && *candidate == *row)); - state.staged_deletes.push((*table, row.clone())); - } else { - self.committed[*table].retain(|candidate| *candidate != *row); - } - } - Interaction::AssertVisibleInConnection { .. } - | Interaction::AssertMissingInConnection { .. } - | Interaction::AssertVisibleFresh { .. } - | Interaction::AssertMissingFresh { .. } - | Interaction::AssertRowCountFresh { .. } => {} - } - } - - fn committed_rows(mut self) -> Vec> { - for table_rows in &mut self.committed { - table_rows.sort_by_key(|row| row.id().unwrap_or_default()); - } - self.committed - } -} - #[cfg(test)] mod tests { - use std::sync::{Mutex, OnceLock}; + use std::{ + sync::{Mutex, OnceLock}, + time::Duration, + }; use pretty_assertions::assert_eq; use proptest::prelude::*; + use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use tempfile::tempdir; use crate::{ runner::{rerun_case, run_generated, verify_repeatable_execution}, + schema::{ColumnPlan, TablePlan}, seed::DstSeed, }; use super::{ - failure_reason, generate_case, load_bug_artifact, run_case_detailed, run_generated_stream, save_bug_artifact, - shrink_failure, ColumnKind, ColumnPlan, DatastoreBugArtifact, DatastoreSimulatorCase, - DatastoreSimulatorSubsystem, Interaction, SchemaPlan, SimRow, SimValue, TablePlan, + failure_reason, generate_case, generate_case_for_scenario, load_bug_artifact, parse_duration_spec, + run_case_detailed, save_bug_artifact, shrink_failure, DatastoreBugArtifact, DatastoreScenario, + DatastoreSimulatorCase, DatastoreSimulatorSubsystem, Interaction, SchemaPlan, SimRow, }; + use crate::workload::table_ops::TableProperty; fn test_lock() -> &'static Mutex<()> { static LOCK: OnceLock> = OnceLock::new(); @@ -1185,9 +541,22 @@ mod tests { } #[test] - fn streamed_runner_supports_long_cases() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - run_generated_stream(DstSeed(1234), 10_000).expect("run long streamed datastore simulator case"); + fn duration_specs_parse() { + assert_eq!(parse_duration_spec("5m").expect("parse 5m"), Duration::from_secs(300)); + assert_eq!(parse_duration_spec("2s").expect("parse 2s"), Duration::from_secs(2)); + assert_eq!( + parse_duration_spec("10ms").expect("parse 10ms"), + Duration::from_millis(10) + ); + } + + #[test] + fn banking_generation_uses_fixed_schema() { + let case = generate_case_for_scenario(DstSeed(9090), DatastoreScenario::Banking); + assert_eq!(case.scenario, DatastoreScenario::Banking); + assert_eq!(case.schema.tables.len(), 2); + assert_eq!(case.schema.tables[0].name, "debit_accounts"); + assert_eq!(case.schema.tables[1].name, "credit_accounts"); } #[test] @@ -1206,17 +575,18 @@ mod tests { assert_eq!(owner, Some(conn), "non-owner closed writer"); owner = None; } - Interaction::Insert { conn, .. } - | Interaction::Delete { conn, .. } - | Interaction::AssertVisibleInConnection { conn, .. } - | Interaction::AssertMissingInConnection { conn, .. } => { + Interaction::Insert { conn, .. } | Interaction::Delete { conn, .. } => { + if let Some(writer) = owner { + assert_eq!(conn, writer, "interaction ran on non-owner while writer open"); + } + } + Interaction::Check(TableProperty::VisibleInConnection { conn, .. }) + | Interaction::Check(TableProperty::MissingInConnection { conn, .. }) => { if let Some(writer) = owner { assert_eq!(conn, writer, "interaction ran on non-owner while writer open"); } } - Interaction::AssertVisibleFresh { .. } - | Interaction::AssertMissingFresh { .. } - | Interaction::AssertRowCountFresh { .. } => {} + Interaction::Check(_) => {} } } @@ -1228,6 +598,7 @@ mod tests { let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); let case = DatastoreSimulatorCase { seed: DstSeed(88), + scenario: DatastoreScenario::RandomCrud, num_connections: 2, schema: SchemaPlan { tables: vec![TablePlan { @@ -1235,11 +606,11 @@ mod tests { columns: vec![ ColumnPlan { name: "id".into(), - kind: ColumnKind::U64, + ty: AlgebraicType::U64, }, ColumnPlan { name: "name".into(), - kind: ColumnKind::String, + ty: AlgebraicType::String, }, ], secondary_index_col: Some(1), @@ -1260,6 +631,7 @@ mod tests { let path = dir.path().join("bug.json"); let case = DatastoreSimulatorCase { seed: DstSeed(5), + scenario: DatastoreScenario::RandomCrud, num_connections: 1, schema: SchemaPlan { tables: vec![TablePlan { @@ -1267,22 +639,22 @@ mod tests { columns: vec![ ColumnPlan { name: "id".into(), - kind: ColumnKind::U64, + ty: AlgebraicType::U64, }, ColumnPlan { name: "ok".into(), - kind: ColumnKind::Bool, + ty: AlgebraicType::Bool, }, ], secondary_index_col: Some(1), }], }, - interactions: vec![Interaction::AssertVisibleFresh { + interactions: vec![Interaction::Check(TableProperty::VisibleFresh { table: 0, row: SimRow { - values: vec![SimValue::U64(7), SimValue::Bool(true)], + values: vec![AlgebraicValue::U64(7), AlgebraicValue::Bool(true)], }, - }], + })], }; let failure = run_case_detailed(&case).expect_err("case should fail"); let artifact = DatastoreBugArtifact { @@ -1302,6 +674,7 @@ mod tests { let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); let case = DatastoreSimulatorCase { seed: DstSeed(77), + scenario: DatastoreScenario::RandomCrud, num_connections: 1, schema: SchemaPlan { tables: vec![TablePlan { @@ -1309,11 +682,11 @@ mod tests { columns: vec![ ColumnPlan { name: "id".into(), - kind: ColumnKind::U64, + ty: AlgebraicType::U64, }, ColumnPlan { name: "name".into(), - kind: ColumnKind::String, + ty: AlgebraicType::String, }, ], secondary_index_col: Some(1), @@ -1324,26 +697,26 @@ mod tests { conn: 0, table: 0, row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], + values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], }, }, - Interaction::AssertVisibleFresh { + Interaction::Check(TableProperty::VisibleFresh { table: 0, row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], + values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], }, - }, - Interaction::AssertMissingFresh { + }), + Interaction::Check(TableProperty::MissingFresh { table: 0, row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], + values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], }, - }, + }), Interaction::Insert { conn: 0, table: 0, row: SimRow { - values: vec![SimValue::U64(2), SimValue::String("two".into())], + values: vec![AlgebraicValue::U64(2), AlgebraicValue::String("two".into())], }, }, ], @@ -1359,6 +732,7 @@ mod tests { fn failing_case() -> DatastoreSimulatorCase { DatastoreSimulatorCase { seed: DstSeed(99), + scenario: DatastoreScenario::RandomCrud, num_connections: 1, schema: SchemaPlan { tables: vec![TablePlan { @@ -1366,11 +740,11 @@ mod tests { columns: vec![ ColumnPlan { name: "id".into(), - kind: ColumnKind::U64, + ty: AlgebraicType::U64, }, ColumnPlan { name: "name".into(), - kind: ColumnKind::String, + ty: AlgebraicType::String, }, ], secondary_index_col: Some(1), @@ -1381,21 +755,21 @@ mod tests { conn: 0, table: 0, row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], + values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], }, }, - Interaction::AssertVisibleFresh { + Interaction::Check(TableProperty::VisibleFresh { table: 0, row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], + values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], }, - }, - Interaction::AssertMissingFresh { + }), + Interaction::Check(TableProperty::MissingFresh { table: 0, row: SimRow { - values: vec![SimValue::U64(1), SimValue::String("one".into())], + values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], }, - }, + }), ], } } diff --git a/crates/dst/src/targets/harness.rs b/crates/dst/src/targets/harness.rs new file mode 100644 index 00000000000..b4624861c4b --- /dev/null +++ b/crates/dst/src/targets/harness.rs @@ -0,0 +1,140 @@ +use std::path::Path; + +use crate::{ + bugbase::{load_json, save_json}, + config::RunConfig, + schema::SchemaPlan, + seed::DstSeed, + shrink::shrink_by_removing, + subsystem::RunRecord, + trace::Trace, + workload::table_ops::{ + default_target_ops, execute_interactions, run_generated_with_engine, InteractionStream, TableScenario, + TableScenarioId, TableWorkloadCase, TableWorkloadEngine, TableWorkloadEvent, TableWorkloadExecutionFailure, + TableWorkloadOutcome, + }, +}; + +pub trait TableTargetHarness { + type Engine: TableWorkloadEngine; + + fn target_name() -> &'static str; + fn connection_seed_discriminator() -> u64; + fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result; + + fn can_remove_interaction(interaction: &crate::workload::table_ops::TableWorkloadInteraction) -> bool { + !matches!( + interaction, + crate::workload::table_ops::TableWorkloadInteraction::CommitTx { .. } + | crate::workload::table_ops::TableWorkloadInteraction::RollbackTx { .. } + ) + } +} + +pub fn materialize_case( + seed: DstSeed, + scenario: TableScenarioId, + max_interactions: usize, +) -> TableWorkloadCase { + let mut rng = seed.fork(T::connection_seed_discriminator()).rng(); + let num_connections = rng.index(3) + 1; + let schema = scenario.generate_schema(&mut rng); + let interactions = + InteractionStream::new(seed, scenario, schema.clone(), num_connections, max_interactions).collect(); + TableWorkloadCase { + seed, + scenario, + num_connections, + schema, + interactions, + } +} + +pub fn generate_case(seed: DstSeed, scenario: TableScenarioId) -> TableWorkloadCase { + let mut rng = seed.fork(T::connection_seed_discriminator()).rng(); + materialize_case::(seed, scenario, default_target_ops(&mut rng)) +} + +pub fn run_case_detailed( + case: &TableWorkloadCase, +) -> Result, TableWorkloadExecutionFailure> { + let mut trace = Trace::default(); + for interaction in &case.interactions { + trace.push(TableWorkloadEvent::Executed(interaction.clone())); + } + + let outcome = execute_interactions( + &case.scenario, + &case.schema, + case.num_connections, + case.interactions.clone(), + T::build_engine, + )?; + + Ok(RunRecord { + subsystem: T::target_name(), + seed: case.seed, + case: case.clone(), + trace: Some(trace), + outcome, + }) +} + +pub fn run_generated_with_config_and_scenario( + seed: DstSeed, + scenario: TableScenarioId, + config: RunConfig, +) -> anyhow::Result { + run_generated_with_engine(seed, scenario, config, T::build_engine) +} + +pub fn save_case(path: impl AsRef, case: &TableWorkloadCase) -> anyhow::Result<()> { + save_json(path, case) +} + +pub fn load_case(path: impl AsRef) -> anyhow::Result { + load_json(path) +} + +pub fn failure_reason(case: &TableWorkloadCase) -> anyhow::Result { + match run_case_detailed::(case) { + Ok(_) => anyhow::bail!("case did not fail"), + Err(failure) => Ok(failure.reason), + } +} + +pub fn shrink_failure( + case: &TableWorkloadCase, + failure: &TableWorkloadExecutionFailure, +) -> anyhow::Result { + shrink_by_removing( + case, + failure, + |case| { + let mut shrunk = case.clone(); + shrunk.interactions.truncate(failure.step_index.saturating_add(1)); + shrunk + }, + |case| case.interactions.len(), + |case, idx| { + let interaction = case.interactions.get(idx)?; + if !T::can_remove_interaction(interaction) { + return None; + } + let mut interactions = case.interactions.clone(); + interactions.remove(idx); + Some(TableWorkloadCase { + seed: case.seed, + scenario: case.scenario, + num_connections: case.num_connections, + schema: case.schema.clone(), + interactions, + }) + }, + |case| match run_case_detailed::(case) { + Ok(_) => anyhow::bail!("case did not fail"), + Err(failure) => Ok(failure), + }, + |expected, candidate| expected.reason == candidate.reason, + ) +} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index 3dac4e35aae..df038999af5 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -1,3 +1,5 @@ //! Concrete simulation targets. pub mod datastore; +pub mod harness; +pub mod relational_db; diff --git a/crates/dst/src/targets/relational_db.rs b/crates/dst/src/targets/relational_db.rs new file mode 100644 index 00000000000..7fa4bcb8a4d --- /dev/null +++ b/crates/dst/src/targets/relational_db.rs @@ -0,0 +1,532 @@ +//! Basic RelationalDB simulator target using the shared table workload. + +use std::path::Path; + +use spacetimedb_core::{ + db::relational_db::{MutTx as RelMutTx, RelationalDB, Tx as RelTx}, + messages::control_db::HostType, +}; +use spacetimedb_datastore::{ + execution_context::Workload, + traits::{IsolationLevel, Program}, +}; +use spacetimedb_durability::EmptyHistory; +use spacetimedb_lib::{ + db::auth::{StAccess, StTableType}, + Identity, +}; +use spacetimedb_primitives::TableId; +use spacetimedb_sats::AlgebraicValue; +use spacetimedb_schema::{ + def::BTreeAlgorithm, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, + table_name::TableName, +}; +use spacetimedb_table::page_pool::PagePool; + +use crate::{ + bugbase::{load_json, save_json, BugArtifact}, + config::RunConfig, + schema::{SchemaPlan, SimRow}, + seed::DstSeed, + subsystem::{DstSubsystem, RunRecord}, + targets::harness::{self, TableTargetHarness}, + workload::table_ops::{ + ConnectionWriteState, TableProperty, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, + TableWorkloadEvent, TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, + }, +}; + +pub type RelationalDbScenario = TableScenarioId; +pub type RelationalDbSimulatorCase = TableWorkloadCase; +pub type RelationalDbInteraction = TableWorkloadInteraction; +pub type RelationalDbSimulatorEvent = TableWorkloadEvent; +pub type RelationalDbSimulatorOutcome = TableWorkloadOutcome; +pub type RelationalDbExecutionFailure = TableWorkloadExecutionFailure; +pub type RelationalDbBugArtifact = BugArtifact; +pub type RelationalDbRunConfig = RunConfig; + +/// DST subsystem wrapper around the relational-db simulator target. +pub struct RelationalDbSimulatorSubsystem; + +struct RelationalDbTarget; + +impl TableTargetHarness for RelationalDbTarget { + type Engine = RelationalDbEngine; + + fn target_name() -> &'static str { + RelationalDbSimulatorSubsystem::name() + } + + fn connection_seed_discriminator() -> u64 { + 31 + } + + fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { + RelationalDbEngine::new(schema, num_connections) + } +} + +impl DstSubsystem for RelationalDbSimulatorSubsystem { + type Case = RelationalDbSimulatorCase; + type Event = RelationalDbSimulatorEvent; + type Outcome = RelationalDbSimulatorOutcome; + + fn name() -> &'static str { + "relational-db-simulator" + } + + fn generate_case(seed: DstSeed) -> Self::Case { + harness::generate_case::(seed, RelationalDbScenario::RandomCrud) + } + + fn run_case(case: &Self::Case) -> anyhow::Result> { + harness::run_case_detailed::(case).map_err(|failure| { + anyhow::anyhow!( + "relational db simulator failed at step {}: {}", + failure.step_index, + failure.reason + ) + }) + } +} + +pub fn generate_case(seed: DstSeed) -> RelationalDbSimulatorCase { + generate_case_for_scenario(seed, RelationalDbScenario::RandomCrud) +} + +pub fn generate_case_for_scenario(seed: DstSeed, scenario: RelationalDbScenario) -> RelationalDbSimulatorCase { + harness::generate_case::(seed, scenario) +} + +pub fn materialize_case( + seed: DstSeed, + scenario: RelationalDbScenario, + max_interactions: usize, +) -> RelationalDbSimulatorCase { + harness::materialize_case::(seed, scenario, max_interactions) +} + +pub fn run_case_detailed( + case: &RelationalDbSimulatorCase, +) -> Result< + RunRecord, + RelationalDbExecutionFailure, +> { + harness::run_case_detailed::(case) +} + +pub fn run_generated_stream(seed: DstSeed, max_interactions: usize) -> anyhow::Result { + run_generated_with_config(seed, RelationalDbRunConfig::with_max_interactions(max_interactions)) +} + +pub fn run_generated_with_config( + seed: DstSeed, + config: RelationalDbRunConfig, +) -> anyhow::Result { + run_generated_with_config_and_scenario(seed, RelationalDbScenario::RandomCrud, config) +} + +pub fn run_generated_with_config_and_scenario( + seed: DstSeed, + scenario: RelationalDbScenario, + config: RelationalDbRunConfig, +) -> anyhow::Result { + harness::run_generated_with_config_and_scenario::(seed, scenario, config) +} + +pub fn save_case(path: impl AsRef, case: &RelationalDbSimulatorCase) -> anyhow::Result<()> { + harness::save_case(path, case) +} + +pub fn load_case(path: impl AsRef) -> anyhow::Result { + harness::load_case(path) +} + +pub fn save_bug_artifact(path: impl AsRef, artifact: &RelationalDbBugArtifact) -> anyhow::Result<()> { + save_json(path, artifact) +} + +pub fn load_bug_artifact(path: impl AsRef) -> anyhow::Result { + load_json(path) +} + +pub fn shrink_failure( + case: &RelationalDbSimulatorCase, + failure: &RelationalDbExecutionFailure, +) -> anyhow::Result { + harness::shrink_failure::(case, failure) +} + +/// Concrete `RelationalDB` execution harness for the shared table workload. +struct RelationalDbEngine { + db: RelationalDB, + table_ids: Vec, + execution: ConnectionWriteState, +} + +impl RelationalDbEngine { + fn new(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { + let db = bootstrap_relational_db()?; + let table_ids = install_schema(&db, schema)?; + Ok(Self { + db, + table_ids, + execution: ConnectionWriteState::new(num_connections), + }) + } + + fn with_mut_tx( + &mut self, + conn: usize, + table: usize, + mut f: impl FnMut(&RelationalDB, TableId, &mut RelMutTx) -> Result<(), String>, + ) -> Result<(), String> { + let table_id = *self + .table_ids + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + self.execution.ensure_known_connection(conn)?; + let slot = &mut self.execution.tx_by_connection[conn]; + + match slot { + Some(tx) => f(&self.db, table_id, tx), + None => { + if let Some(owner) = self.execution.active_writer { + return Err(format!( + "connection {conn} cannot auto-commit write while connection {owner} owns lock" + )); + } + let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + self.execution.active_writer = Some(conn); + f(&self.db, table_id, &mut tx)?; + self.db + .commit_tx(tx) + .map_err(|err| format!("auto-commit failed on connection {conn}: {err}"))?; + self.execution.active_writer = None; + Ok(()) + } + } + } + + fn fresh_lookup(&self, table_id: TableId, id: u64) -> anyhow::Result> { + let tx = self.db.begin_tx(Workload::ForTests); + let result = self + .db + .iter_by_col_eq(&tx, table_id, 0u16, &AlgebraicValue::U64(id))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .find(|row| row.id() == Some(id)); + let _ = self.db.release_tx(tx); + Ok(result) + } + + fn collect_rows_for_table(&self, table: usize) -> anyhow::Result> { + let table_id = *self + .table_ids + .get(table) + .ok_or_else(|| anyhow::anyhow!("table {table} out of range"))?; + let tx = self.db.begin_tx(Workload::ForTests); + let mut rows = self + .db + .iter(&tx, table_id)? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + let _ = self.db.release_tx(tx); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } +} + +impl TableWorkloadEngine for RelationalDbEngine { + fn execute(&mut self, interaction: &RelationalDbInteraction) -> Result<(), String> { + match interaction { + RelationalDbInteraction::BeginTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + if self.execution.tx_by_connection[*conn].is_some() { + return Err(format!("connection {conn} already has open transaction")); + } + if let Some(owner) = self.execution.active_writer { + return Err(format!( + "connection {conn} cannot begin write transaction while connection {owner} owns lock" + )); + } + self.execution.tx_by_connection[*conn] = + Some(self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests)); + self.execution.active_writer = Some(*conn); + } + RelationalDbInteraction::CommitTx { conn } => { + self.execution.ensure_writer_owner(*conn, "commit")?; + let tx = self.execution.tx_by_connection[*conn] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; + self.db + .commit_tx(tx) + .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; + self.execution.active_writer = None; + } + RelationalDbInteraction::RollbackTx { conn } => { + self.execution.ensure_writer_owner(*conn, "rollback")?; + let tx = self.execution.tx_by_connection[*conn] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; + let _ = self.db.rollback_mut_tx(tx); + self.execution.active_writer = None; + } + RelationalDbInteraction::Insert { conn, table, row } => { + self.with_mut_tx(*conn, *table, |db, table_id, tx| { + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + db.insert(tx, table_id, &bsatn) + .map_err(|err| format!("insert failed: {err}"))?; + Ok(()) + })?; + } + RelationalDbInteraction::Delete { conn, table, row } => { + self.with_mut_tx(*conn, *table, |db, table_id, tx| { + let deleted = db.delete_by_rel(tx, table_id, [row.to_product_value()]); + if deleted != 1 { + return Err(format!("delete expected 1 row, got {deleted}")); + } + Ok(()) + })?; + } + RelationalDbInteraction::Check(TableProperty::VisibleInConnection { conn, table, row }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { + self.db + .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .any(|candidate| candidate == *row) + } else { + self.fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))? + == Some(row.clone()) + }; + if !found { + return Err(format!("row not visible in connection after write: {row:?}")); + } + } + RelationalDbInteraction::Check(TableProperty::MissingInConnection { conn, table, row }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { + self.db + .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .next() + .is_some() + } else { + self.fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))? + .is_some() + }; + if found { + return Err(format!("row still visible in connection after delete: {row:?}")); + } + } + RelationalDbInteraction::Check(TableProperty::VisibleFresh { table, row }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = self + .fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))?; + if found != Some(row.clone()) { + return Err(format!("fresh lookup mismatch: expected={row:?} actual={found:?}")); + } + } + RelationalDbInteraction::Check(TableProperty::MissingFresh { table, row }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if self + .fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}"))? + .is_some() + { + return Err(format!("fresh lookup still found deleted row: {row:?}")); + } + } + RelationalDbInteraction::Check(TableProperty::RowCountFresh { table, expected }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let tx: RelTx = self.db.begin_tx(Workload::ForTests); + let actual = self + .db + .iter(&tx, table_id) + .map_err(|err| format!("row count scan failed: {err}"))? + .count() as u64; + let _ = self.db.release_tx(tx); + if actual != *expected { + return Err(format!("row count mismatch: expected={expected} actual={actual}")); + } + } + RelationalDbInteraction::Check(TableProperty::TablesMatchFresh { left, right }) => { + let left_rows = self + .collect_rows_for_table(*left) + .map_err(|err| format!("left table collect failed: {err}"))?; + let right_rows = self + .collect_rows_for_table(*right) + .map_err(|err| format!("right table collect failed: {err}"))?; + if left_rows != right_rows { + return Err(format!( + "fresh table mismatch: left_table={left} right_table={right} left={left_rows:?} right={right_rows:?}" + )); + } + } + } + + Ok(()) + } + + fn collect_outcome(&mut self) -> anyhow::Result { + let tx = self.db.begin_tx(Workload::ForTests); + let mut final_rows = Vec::with_capacity(self.table_ids.len()); + let mut final_row_counts = Vec::with_capacity(self.table_ids.len()); + + for &table_id in &self.table_ids { + let mut rows = self + .db + .iter(&tx, table_id)? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + final_row_counts.push(rows.len() as u64); + final_rows.push(rows); + } + let _ = self.db.release_tx(tx); + + Ok(RelationalDbSimulatorOutcome { + final_row_counts, + final_rows, + }) + } + + fn finish(&mut self) { + for tx in &mut self.execution.tx_by_connection { + if let Some(tx) = tx.take() { + let _ = self.db.rollback_mut_tx(tx); + } + } + self.execution.active_writer = None; + } +} + +fn bootstrap_relational_db() -> anyhow::Result { + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + EmptyHistory::new(), + None, + None, + PagePool::new_for_test(), + )?; + assert_eq!(connected_clients.len(), 0); + db.with_auto_commit(Workload::Internal, |tx| { + db.set_initialized(tx, Program::empty(HostType::Wasm.into())) + })?; + Ok(db) +} + +fn install_schema(db: &RelationalDB, schema: &SchemaPlan) -> anyhow::Result> { + let mut tx = db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + let mut table_ids = Vec::with_capacity(schema.tables.len()); + + for table in &schema.tables { + let columns = table + .columns + .iter() + .enumerate() + .map(|(idx, col)| ColumnSchema::for_test(idx as u16, &col.name, col.ty.clone())) + .collect::>(); + + let mut indexes = vec![IndexSchema::for_test( + format!("{}_id_idx", table.name), + BTreeAlgorithm::from(0), + )]; + if let Some(col) = table.secondary_index_col { + indexes.push(IndexSchema::for_test( + format!("{}_c{col}_idx", table.name), + BTreeAlgorithm::from(col), + )); + } + let constraints = vec![ConstraintSchema::unique_for_test( + format!("{}_id_unique", table.name), + 0, + )]; + + let table_id = db.create_table( + &mut tx, + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(&table.name), + None, + columns, + indexes, + constraints, + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ), + )?; + table_ids.push(table_id); + } + + db.commit_tx(tx)?; + Ok(table_ids) +} + +#[cfg(test)] +mod tests { + use std::sync::{Mutex, OnceLock}; + + use pretty_assertions::assert_eq; + + use crate::{ + runner::{rerun_case, run_generated}, + seed::DstSeed, + }; + + use super::{generate_case_for_scenario, RelationalDbScenario, RelationalDbSimulatorSubsystem}; + + fn test_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + } + + #[test] + fn generated_case_replays_identically() { + let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); + let artifact = run_generated::(DstSeed(13)).expect("run relational db case"); + let replayed = rerun_case::(&artifact).expect("rerun relational db case"); + assert_eq!(artifact.case, replayed.case); + assert_eq!(artifact.trace, replayed.trace); + assert_eq!(artifact.outcome, replayed.outcome); + } + + #[test] + fn banking_generation_uses_fixed_schema() { + let case = generate_case_for_scenario(DstSeed(4242), RelationalDbScenario::Banking); + assert_eq!(case.scenario, RelationalDbScenario::Banking); + assert_eq!(case.schema.tables.len(), 2); + assert_eq!(case.schema.tables[0].name, "debit_accounts"); + assert_eq!(case.schema.tables[1].name, "credit_accounts"); + } +} diff --git a/crates/dst/src/trace.rs b/crates/dst/src/trace.rs index ccb146b6774..8251331ccf3 100644 --- a/crates/dst/src/trace.rs +++ b/crates/dst/src/trace.rs @@ -7,10 +7,15 @@ /// One event plus optional metadata captured during execution. #[derive(Clone, Debug, Eq, PartialEq)] pub struct StampedEvent { + /// Monotonic step number within the trace. pub step_id: u64, + /// Optional logical time supplied by a scheduler-style simulation. pub logical_time: Option, + /// Optional actor identity for actor-driven simulations. pub actor_id: Option, + /// Optional resource identity such as a lock, table, or replica id. pub resource_id: Option, + /// Target-specific event payload. pub event: E, } diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs new file mode 100644 index 00000000000..3de504a5324 --- /dev/null +++ b/crates/dst/src/workload/mod.rs @@ -0,0 +1,3 @@ +//! Shared workload generators reused by multiple DST targets. + +pub mod table_ops; diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs new file mode 100644 index 00000000000..a4bed7d16fe --- /dev/null +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -0,0 +1,172 @@ +use std::collections::VecDeque; + +use crate::{ + schema::SchemaPlan, + seed::{DstRng, DstSeed}, +}; + +use super::{model::GenerationModel, TableScenario, TableWorkloadInteraction}; + +/// Streaming planner for table-oriented workloads. +/// +/// The stream keeps only generator state plus a small pending queue, so long +/// duration runs do not need to materialize the full interaction list in +/// memory up front. +#[derive(Clone, Debug)] +pub struct InteractionStream { + rng: DstRng, + scenario: S, + model: GenerationModel, + num_connections: usize, + target_interactions: usize, + emitted: usize, + finalize_conn: usize, + pending: VecDeque, + finished: bool, +} + +pub struct ScenarioPlanner<'a> { + rng: &'a mut DstRng, + model: &'a mut GenerationModel, + pending: &'a mut VecDeque, +} + +impl<'a> ScenarioPlanner<'a> { + pub fn choose_index(&mut self, len: usize) -> usize { + self.rng.index(len) + } + + pub fn choose_table(&mut self) -> usize { + self.rng.index(self.model.schema.tables.len()) + } + + pub fn roll_percent(&mut self, percent: usize) -> bool { + self.rng.index(100) < percent + } + + pub fn maybe_control_tx(&mut self, conn: usize, begin_pct: usize, commit_pct: usize, rollback_pct: usize) -> bool { + if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() && self.roll_percent(begin_pct) { + self.model.begin_tx(conn); + self.pending.push_back(TableWorkloadInteraction::BeginTx { conn }); + return true; + } + + if self.model.connections[conn].in_tx && self.roll_percent(commit_pct) { + let followups = self.model.commit(conn); + self.pending.push_back(TableWorkloadInteraction::CommitTx { conn }); + self.pending.extend(followups); + return true; + } + + if self.model.connections[conn].in_tx && self.roll_percent(rollback_pct) { + let followups = self.model.rollback(conn); + self.pending.push_back(TableWorkloadInteraction::RollbackTx { conn }); + self.pending.extend(followups); + return true; + } + + false + } + + pub fn visible_rows(&self, conn: usize, table: usize) -> Vec { + self.model.visible_rows(conn, table) + } + + pub fn make_row(&mut self, table: usize) -> crate::schema::SimRow { + self.model.make_row(self.rng, table) + } + + pub fn insert(&mut self, conn: usize, table: usize, row: crate::schema::SimRow) { + self.model.insert(conn, table, row); + } + + pub fn delete(&mut self, conn: usize, table: usize, row: crate::schema::SimRow) { + self.model.delete(conn, table, row); + } + + pub fn last_inserted_row(&self, conn: usize) -> Option { + self.model.last_inserted_row(conn) + } + + pub fn in_tx(&self, conn: usize) -> bool { + self.model.connections[conn].in_tx + } + + pub fn push_interaction(&mut self, interaction: TableWorkloadInteraction) { + self.pending.push_back(interaction); + } +} + +impl InteractionStream { + pub fn new( + seed: DstSeed, + scenario: S, + schema: SchemaPlan, + num_connections: usize, + target_interactions: usize, + ) -> Self { + let scenario_commit_properties = scenario.commit_properties(); + Self { + rng: seed.fork(17).rng(), + scenario, + model: GenerationModel::new(&schema, num_connections, seed, scenario_commit_properties), + num_connections, + target_interactions, + emitted: 0, + finalize_conn: 0, + pending: VecDeque::new(), + finished: false, + } + } + + pub fn request_finish(&mut self) { + self.target_interactions = self.emitted; + } + + fn fill_pending(&mut self) { + if self.emitted >= self.target_interactions { + while self.finalize_conn < self.num_connections { + let conn = self.finalize_conn; + self.finalize_conn += 1; + if self.model.connections[conn].in_tx { + let followups = self.model.commit(conn); + self.pending.push_back(TableWorkloadInteraction::CommitTx { conn }); + self.pending.extend(followups); + return; + } + } + self.finished = true; + return; + } + + let conn = self + .model + .active_writer() + .unwrap_or_else(|| self.rng.index(self.num_connections)); + let mut planner = ScenarioPlanner { + rng: &mut self.rng, + model: &mut self.model, + pending: &mut self.pending, + }; + self.scenario.fill_pending(&mut planner, conn); + } +} + +impl Iterator for InteractionStream { + type Item = TableWorkloadInteraction; + + fn next(&mut self) -> Option { + loop { + if let Some(interaction) = self.pending.pop_front() { + self.emitted += 1; + return Some(interaction); + } + + if self.finished { + return None; + } + + self.fill_pending(); + } + } +} diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs new file mode 100644 index 00000000000..731fd24ba46 --- /dev/null +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -0,0 +1,17 @@ +//! Shared transactional table workload used by datastore-like targets. + +mod generation; +mod model; +mod properties; +mod runner; +mod scenarios; +mod types; + +pub use generation::{InteractionStream, ScenarioPlanner}; +pub use properties::{followup_properties_after_commit, property_interaction, TableProperty}; +pub use runner::{execute_interactions, run_generated_with_engine}; +pub use scenarios::{default_target_ops, BankingScenario, RandomCrudScenario, TableScenarioId}; +pub use types::{ + ConnectionWriteState, TableScenario, TableWorkloadCase, TableWorkloadEngine, TableWorkloadEvent, + TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, +}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs new file mode 100644 index 00000000000..26700cc6663 --- /dev/null +++ b/crates/dst/src/workload/table_ops/model.rs @@ -0,0 +1,253 @@ +use std::collections::BTreeSet; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + schema::{generate_value_for_type, SchemaPlan, SimRow}, + seed::{DstRng, DstSeed}, +}; + +use super::{followup_properties_after_commit, property_interaction, TableProperty, TableWorkloadInteraction}; + +/// Generator-side model of committed rows plus per-connection pending writes. +/// +/// This model is used only while producing interactions. It lets the planner +/// pick valid deletes, synthesize visibility checks, and enforce the +/// single-writer discipline before the real target executes anything. +#[derive(Clone, Debug)] +pub(crate) struct GenerationModel { + pub(crate) schema: SchemaPlan, + pub(crate) connections: Vec, + committed: Vec>, + next_ids: Vec, + active_writer: Option, + scenario_commit_properties: Vec, +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct PendingConnection { + pub(crate) in_tx: bool, + staged_inserts: Vec<(usize, SimRow)>, + staged_deletes: Vec<(usize, SimRow)>, + last_auto_committed_insert: Option, +} + +impl GenerationModel { + pub(crate) fn new( + schema: &SchemaPlan, + num_connections: usize, + seed: DstSeed, + scenario_commit_properties: Vec, + ) -> Self { + Self { + schema: schema.clone(), + connections: vec![PendingConnection::default(); num_connections], + committed: vec![Vec::new(); schema.tables.len()], + next_ids: (0..schema.tables.len()) + .map(|idx| seed.fork(idx as u64 + 100).0) + .collect(), + active_writer: None, + scenario_commit_properties, + } + } + + pub(crate) fn make_row(&mut self, rng: &mut DstRng, table: usize) -> SimRow { + let table_plan = &self.schema.tables[table]; + let id = self.next_ids[table]; + self.next_ids[table] = self.next_ids[table].wrapping_add(1).max(1); + let mut values = vec![AlgebraicValue::U64(id)]; + for (idx, col) in table_plan.columns.iter().enumerate().skip(1) { + values.push(generate_value_for_type(rng, &col.ty, idx)); + } + SimRow { values } + } + + pub(crate) fn visible_rows(&self, conn: usize, table: usize) -> Vec { + let mut rows = self.committed[table].clone(); + let pending = &self.connections[conn]; + for (pending_table, row) in &pending.staged_deletes { + if *pending_table == table { + rows.retain(|candidate| candidate != row); + } + } + for (pending_table, row) in &pending.staged_inserts { + if *pending_table == table { + rows.push(row.clone()); + } + } + rows + } + + pub(crate) fn active_writer(&self) -> Option { + self.active_writer + } + + pub(crate) fn begin_tx(&mut self, conn: usize) { + assert!(self.active_writer.is_none(), "single writer already active"); + let pending = &mut self.connections[conn]; + assert!(!pending.in_tx, "connection already in transaction"); + pending.in_tx = true; + self.active_writer = Some(conn); + } + + pub(crate) fn insert(&mut self, conn: usize, table: usize, row: SimRow) { + let pending = &mut self.connections[conn]; + if pending.in_tx { + pending.staged_inserts.push((table, row)); + } else { + self.committed[table].push(row.clone()); + pending.last_auto_committed_insert = Some(row); + } + } + + pub(crate) fn last_inserted_row(&self, conn: usize) -> Option { + self.connections[conn].last_auto_committed_insert.clone() + } + + pub(crate) fn delete(&mut self, conn: usize, table: usize, row: SimRow) { + let pending = &mut self.connections[conn]; + if pending.in_tx { + pending + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == table && *candidate == row)); + pending.staged_deletes.push((table, row)); + } else { + self.committed[table].retain(|candidate| *candidate != row); + } + } + + pub(crate) fn commit(&mut self, conn: usize) -> Vec { + let pending = &mut self.connections[conn]; + let inserts = std::mem::take(&mut pending.staged_inserts); + let deletes = std::mem::take(&mut pending.staged_deletes); + pending.in_tx = false; + self.active_writer = None; + + for (table, row) in &deletes { + self.committed[*table].retain(|candidate| candidate != row); + } + for (table, row) in &inserts { + self.committed[*table].push(row.clone()); + } + + followup_properties_after_commit(self.scenario_commit_properties.clone(), inserts, deletes) + } + + pub(crate) fn rollback(&mut self, conn: usize) -> Vec { + let pending = &mut self.connections[conn]; + let touched_tables = pending + .staged_inserts + .iter() + .chain(pending.staged_deletes.iter()) + .map(|(table, _)| *table) + .collect::>(); + pending.staged_inserts.clear(); + pending.staged_deletes.clear(); + pending.in_tx = false; + self.active_writer = None; + let mut followups = touched_tables + .into_iter() + .map(|table| { + property_interaction(TableProperty::RowCountFresh { + table, + expected: self.committed[table].len() as u64, + }) + }) + .collect::>(); + followups.extend(self.scenario_commit_properties.clone()); + followups + } +} + +/// Replay model for the expected final committed state of a table workload. +/// +/// The shared runner applies every interaction here in parallel with the real +/// target execution, then compares the collected target outcome against this +/// model at the end of the run. +#[derive(Clone, Debug)] +pub struct ExpectedModel { + committed: Vec>, + connections: Vec, + active_writer: Option, +} + +#[derive(Clone, Debug, Default)] +struct ExpectedConnection { + in_tx: bool, + staged_inserts: Vec<(usize, SimRow)>, + staged_deletes: Vec<(usize, SimRow)>, +} + +impl ExpectedModel { + pub fn new(table_count: usize, connection_count: usize) -> Self { + Self { + committed: vec![Vec::new(); table_count], + connections: vec![ExpectedConnection::default(); connection_count], + active_writer: None, + } + } + + pub fn apply(&mut self, interaction: &TableWorkloadInteraction) { + match interaction { + TableWorkloadInteraction::BeginTx { conn } => { + assert!( + self.active_writer.is_none(), + "multiple concurrent writers in expected model" + ); + self.connections[*conn].in_tx = true; + self.active_writer = Some(*conn); + } + TableWorkloadInteraction::CommitTx { conn } => { + assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in expected model"); + let state = &mut self.connections[*conn]; + for (table, row) in state.staged_deletes.drain(..) { + self.committed[table].retain(|candidate| *candidate != row); + } + for (table, row) in state.staged_inserts.drain(..) { + self.committed[table].push(row); + } + state.in_tx = false; + self.active_writer = None; + } + TableWorkloadInteraction::RollbackTx { conn } => { + assert_eq!( + self.active_writer, + Some(*conn), + "rollback by non-owner in expected model" + ); + let state = &mut self.connections[*conn]; + state.staged_inserts.clear(); + state.staged_deletes.clear(); + state.in_tx = false; + self.active_writer = None; + } + TableWorkloadInteraction::Insert { conn, table, row } => { + let state = &mut self.connections[*conn]; + if state.in_tx { + state.staged_inserts.push((*table, row.clone())); + } else { + self.committed[*table].push(row.clone()); + } + } + TableWorkloadInteraction::Delete { conn, table, row } => { + let state = &mut self.connections[*conn]; + if state.in_tx { + state + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == *table && *candidate == *row)); + state.staged_deletes.push((*table, row.clone())); + } else { + self.committed[*table].retain(|candidate| *candidate != *row); + } + } + TableWorkloadInteraction::Check(_) => {} + } + } + + pub fn committed_rows(mut self) -> Vec> { + for table_rows in &mut self.committed { + table_rows.sort_by_key(|row| row.id().unwrap_or_default()); + } + self.committed + } +} diff --git a/crates/dst/src/workload/table_ops/properties.rs b/crates/dst/src/workload/table_ops/properties.rs new file mode 100644 index 00000000000..43eda15d5f6 --- /dev/null +++ b/crates/dst/src/workload/table_ops/properties.rs @@ -0,0 +1,35 @@ +use serde::{Deserialize, Serialize}; + +use crate::schema::SimRow; + +use super::TableWorkloadInteraction; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum TableProperty { + VisibleInConnection { conn: usize, table: usize, row: SimRow }, + MissingInConnection { conn: usize, table: usize, row: SimRow }, + VisibleFresh { table: usize, row: SimRow }, + MissingFresh { table: usize, row: SimRow }, + RowCountFresh { table: usize, expected: u64 }, + TablesMatchFresh { left: usize, right: usize }, +} + +pub fn property_interaction(property: TableProperty) -> TableWorkloadInteraction { + TableWorkloadInteraction::Check(property) +} + +pub fn followup_properties_after_commit( + scenario_commit_properties: Vec, + inserts: Vec<(usize, SimRow)>, + deletes: Vec<(usize, SimRow)>, +) -> Vec { + let mut followups = Vec::new(); + for (table, row) in inserts { + followups.push(property_interaction(TableProperty::VisibleFresh { table, row })); + } + for (table, row) in deletes { + followups.push(property_interaction(TableProperty::MissingFresh { table, row })); + } + followups.extend(scenario_commit_properties); + followups +} diff --git a/crates/dst/src/workload/table_ops/runner.rs b/crates/dst/src/workload/table_ops/runner.rs new file mode 100644 index 00000000000..a8b594e0a9d --- /dev/null +++ b/crates/dst/src/workload/table_ops/runner.rs @@ -0,0 +1,118 @@ +use std::time::Instant; + +use crate::{config::RunConfig, schema::SchemaPlan, seed::DstSeed}; + +use super::{ + model::ExpectedModel, InteractionStream, TableProperty, TableScenario, TableWorkloadEngine, + TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, +}; + +pub fn execute_interactions( + scenario: &S, + schema: &SchemaPlan, + num_connections: usize, + interactions: I, + make_engine: impl FnOnce(&SchemaPlan, usize) -> anyhow::Result, +) -> Result +where + S: TableScenario, + E: TableWorkloadEngine, + I: IntoIterator, +{ + let mut engine = + make_engine(schema, num_connections).map_err(|err| failure_without_step(format!("bootstrap failed: {err}")))?; + let mut expected = ExpectedModel::new(schema.tables.len(), num_connections); + + for (step_index, interaction) in interactions.into_iter().enumerate() { + engine + .execute(&interaction) + .map_err(|reason| TableWorkloadExecutionFailure { + step_index, + reason, + interaction: interaction.clone(), + })?; + expected.apply(&interaction); + } + + engine.finish(); + let outcome = engine + .collect_outcome() + .map_err(|err| failure_without_step(format!("collect outcome failed: {err}")))?; + let expected_rows = expected.committed_rows(); + if outcome.final_rows != expected_rows { + return Err(failure_without_step(format!( + "final datastore state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + ))); + } + + scenario + .validate_outcome(schema, &outcome) + .map_err(|err| failure_without_step(format!("scenario invariant failed: {err}")))?; + + Ok(outcome) +} + +pub fn run_generated_with_engine( + seed: DstSeed, + scenario: S, + config: RunConfig, + make_engine: impl FnOnce(&SchemaPlan, usize) -> anyhow::Result, +) -> anyhow::Result +where + S: TableScenario, + E: TableWorkloadEngine, +{ + let mut rng = seed.fork(17).rng(); + let num_connections = rng.index(3) + 1; + let schema = scenario.generate_schema(&mut rng); + let mut stream = InteractionStream::new( + seed, + scenario.clone(), + schema.clone(), + num_connections, + config.max_interactions_or_default(usize::MAX), + ); + let mut engine = make_engine(&schema, num_connections)?; + let mut expected = ExpectedModel::new(schema.tables.len(), num_connections); + let deadline = config.deadline(); + + let mut step_index = 0usize; + loop { + if deadline.is_some_and(|deadline| Instant::now() >= deadline) { + stream.request_finish(); + } + + let Some(interaction) = stream.next() else { + break; + }; + engine + .execute(&interaction) + .map_err(|reason| anyhow::anyhow!("workload failed at step {step_index}: {reason}"))?; + expected.apply(&interaction); + step_index = step_index.saturating_add(1); + } + + engine.finish(); + let outcome = engine.collect_outcome()?; + let expected_rows = expected.committed_rows(); + if outcome.final_rows != expected_rows { + anyhow::bail!( + "final datastore state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + ); + } + scenario.validate_outcome(&schema, &outcome)?; + Ok(outcome) +} + +fn failure_without_step(reason: String) -> TableWorkloadExecutionFailure { + TableWorkloadExecutionFailure { + step_index: usize::MAX, + reason, + interaction: TableWorkloadInteraction::Check(TableProperty::RowCountFresh { + table: usize::MAX, + expected: 0, + }), + } +} diff --git a/crates/dst/src/workload/table_ops/scenarios/banking.rs b/crates/dst/src/workload/table_ops/scenarios/banking.rs new file mode 100644 index 00000000000..a94804ea05e --- /dev/null +++ b/crates/dst/src/workload/table_ops/scenarios/banking.rs @@ -0,0 +1,160 @@ +use spacetimedb_sats::AlgebraicType; + +use crate::schema::{ColumnPlan, SchemaPlan, TablePlan}; + +use super::super::{ + generation::ScenarioPlanner, + properties::{property_interaction, TableProperty}, + TableWorkloadInteraction, TableWorkloadOutcome, +}; + +pub fn generate_schema() -> SchemaPlan { + SchemaPlan { + tables: vec![ + TablePlan { + name: "debit_accounts".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + ty: AlgebraicType::U64, + }, + ColumnPlan { + name: "balance".into(), + ty: AlgebraicType::U64, + }, + ], + secondary_index_col: Some(1), + }, + TablePlan { + name: "credit_accounts".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + ty: AlgebraicType::U64, + }, + ColumnPlan { + name: "balance".into(), + ty: AlgebraicType::U64, + }, + ], + secondary_index_col: Some(1), + }, + ], + } +} + +pub fn validate_outcome(schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + let debit_idx = schema + .tables + .iter() + .position(|table| table.name == "debit_accounts") + .ok_or_else(|| anyhow::anyhow!("missing debit_accounts table"))?; + let credit_idx = schema + .tables + .iter() + .position(|table| table.name == "credit_accounts") + .ok_or_else(|| anyhow::anyhow!("missing credit_accounts table"))?; + + let debit_rows = outcome + .final_rows + .get(debit_idx) + .ok_or_else(|| anyhow::anyhow!("missing debit_accounts rows"))?; + let credit_rows = outcome + .final_rows + .get(credit_idx) + .ok_or_else(|| anyhow::anyhow!("missing credit_accounts rows"))?; + + if debit_rows != credit_rows { + anyhow::bail!("banking tables diverged: debit={debit_rows:?} credit={credit_rows:?}"); + } + Ok(()) +} + +pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { + if planner.maybe_control_tx(conn, 25, 20, 10) { + return; + } + + let debit_rows = planner.visible_rows(conn, 0); + let choose_insert = debit_rows.is_empty() || planner.roll_percent(65); + if choose_insert { + let row = planner.make_row(0); + let mirror = row.clone(); + planner.insert(conn, 0, row.clone()); + planner.insert(conn, 1, mirror.clone()); + planner.push_interaction(TableWorkloadInteraction::Insert { + conn, + table: 0, + row: row.clone(), + }); + planner.push_interaction(property_interaction(TableProperty::VisibleInConnection { + conn, + table: 0, + row: row.clone(), + })); + planner.push_interaction(TableWorkloadInteraction::Insert { + conn, + table: 1, + row: mirror.clone(), + }); + planner.push_interaction(property_interaction(TableProperty::VisibleInConnection { + conn, + table: 1, + row: mirror.clone(), + })); + if !planner.in_tx(conn) { + planner.push_interaction(property_interaction(TableProperty::VisibleFresh { + table: 0, + row: row.clone(), + })); + planner.push_interaction(property_interaction(TableProperty::VisibleFresh { + table: 1, + row: mirror, + })); + planner.push_interaction(property_interaction(TableProperty::TablesMatchFresh { + left: 0, + right: 1, + })); + } + return; + } + + let row = debit_rows[planner.choose_index(debit_rows.len())].clone(); + let mirror = row.clone(); + planner.delete(conn, 0, row.clone()); + planner.delete(conn, 1, mirror.clone()); + planner.push_interaction(TableWorkloadInteraction::Delete { + conn, + table: 0, + row: row.clone(), + }); + planner.push_interaction(property_interaction(TableProperty::MissingInConnection { + conn, + table: 0, + row: row.clone(), + })); + planner.push_interaction(TableWorkloadInteraction::Delete { + conn, + table: 1, + row: mirror.clone(), + }); + planner.push_interaction(property_interaction(TableProperty::MissingInConnection { + conn, + table: 1, + row: mirror.clone(), + })); + if !planner.in_tx(conn) { + planner.push_interaction(property_interaction(TableProperty::MissingFresh { + table: 0, + row: row.clone(), + })); + planner.push_interaction(property_interaction(TableProperty::MissingFresh { + table: 1, + row: mirror, + })); + planner.push_interaction(property_interaction(TableProperty::TablesMatchFresh { + left: 0, + right: 1, + })); + } +} diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs new file mode 100644 index 00000000000..f7e3ea1698e --- /dev/null +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -0,0 +1,110 @@ +mod banking; +mod random_crud; + +use serde::{Deserialize, Serialize}; + +use crate::{schema::SchemaPlan, seed::DstRng}; + +use super::{ + generation::ScenarioPlanner, TableProperty, TableScenario, TableWorkloadInteraction, TableWorkloadOutcome, +}; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub struct RandomCrudScenario; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub struct BankingScenario; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +pub enum TableScenarioId { + #[default] + RandomCrud, + Banking, +} + +impl TableScenario for RandomCrudScenario { + fn name(&self) -> &'static str { + "random-crud" + } + + fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { + random_crud::generate_schema(rng) + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + random_crud::validate_outcome(schema, outcome) + } + + fn commit_properties(&self) -> Vec { + Vec::new() + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { + random_crud::fill_pending(planner, conn); + } +} + +impl TableScenario for BankingScenario { + fn name(&self) -> &'static str { + "banking" + } + + fn generate_schema(&self, _rng: &mut DstRng) -> SchemaPlan { + banking::generate_schema() + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + banking::validate_outcome(schema, outcome) + } + + fn commit_properties(&self) -> Vec { + vec![super::properties::property_interaction( + TableProperty::TablesMatchFresh { left: 0, right: 1 }, + )] + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { + banking::fill_pending(planner, conn); + } +} + +impl TableScenario for TableScenarioId { + fn name(&self) -> &'static str { + match self { + Self::RandomCrud => RandomCrudScenario.name(), + Self::Banking => BankingScenario.name(), + } + } + + fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { + match self { + Self::RandomCrud => RandomCrudScenario.generate_schema(rng), + Self::Banking => BankingScenario.generate_schema(rng), + } + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + match self { + Self::RandomCrud => RandomCrudScenario.validate_outcome(schema, outcome), + Self::Banking => BankingScenario.validate_outcome(schema, outcome), + } + } + + fn commit_properties(&self) -> Vec { + match self { + Self::RandomCrud => RandomCrudScenario.commit_properties(), + Self::Banking => BankingScenario.commit_properties(), + } + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { + match self { + Self::RandomCrud => RandomCrudScenario.fill_pending(planner, conn), + Self::Banking => BankingScenario.fill_pending(planner, conn), + } + } +} + +pub fn default_target_ops(rng: &mut DstRng) -> usize { + 24 + rng.index(24) +} diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs new file mode 100644 index 00000000000..58aca87a72a --- /dev/null +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -0,0 +1,88 @@ +use spacetimedb_sats::AlgebraicType; + +use crate::{ + schema::{generate_supported_type, ColumnPlan, SchemaPlan, TablePlan}, + seed::DstRng, +}; + +use super::super::{ + generation::ScenarioPlanner, + properties::{property_interaction, TableProperty}, + TableWorkloadOutcome, +}; + +pub fn generate_schema(rng: &mut DstRng) -> SchemaPlan { + let table_count = rng.index(3) + 1; + let mut tables = Vec::with_capacity(table_count); + + for table_idx in 0..table_count { + let extra_cols = rng.index(3); + let mut columns = vec![ColumnPlan { + name: "id".into(), + ty: AlgebraicType::U64, + }]; + for col_idx in 0..extra_cols { + columns.push(ColumnPlan { + name: format!("c{table_idx}_{col_idx}"), + ty: generate_supported_type(rng), + }); + } + let secondary_index_col = (columns.len() > 1 && rng.index(100) < 50).then_some(1); + tables.push(TablePlan { + name: format!("dst_table_{table_idx}_{}", rng.next_u64() % 10_000), + columns, + secondary_index_col, + }); + } + + SchemaPlan { tables } +} + +pub fn validate_outcome(_schema: &SchemaPlan, _outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + Ok(()) +} + +pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { + if planner.maybe_control_tx(conn, 20, 15, 10) { + return; + } + + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + let choose_insert = visible_rows.is_empty() || planner.roll_percent(65); + if choose_insert { + let row = planner.make_row(table); + planner.insert(conn, table, row.clone()); + planner.push_interaction(super::super::TableWorkloadInteraction::Insert { + conn, + table, + row: row.clone(), + }); + planner.push_interaction(property_interaction(TableProperty::VisibleInConnection { + conn, + table, + row, + })); + if !planner.in_tx(conn) { + let row = planner.last_inserted_row(conn).expect("tracked auto-commit insert"); + planner.push_interaction(property_interaction(TableProperty::VisibleFresh { table, row })); + } + return; + } + + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.delete(conn, table, row.clone()); + planner.push_interaction(super::super::TableWorkloadInteraction::Delete { + conn, + table, + row: row.clone(), + }); + planner.push_interaction(property_interaction(TableProperty::MissingInConnection { + conn, + table, + row: row.clone(), + })); + if !planner.in_tx(conn) { + planner.push_interaction(property_interaction(TableProperty::MissingFresh { table, row })); + } +} diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs new file mode 100644 index 00000000000..20f731fc8cc --- /dev/null +++ b/crates/dst/src/workload/table_ops/types.rs @@ -0,0 +1,114 @@ +use serde::{Deserialize, Serialize}; + +use crate::{ + schema::{SchemaPlan, SimRow}, + seed::DstRng, +}; + +use super::{generation::ScenarioPlanner, properties::TableProperty, scenarios::TableScenarioId}; + +/// Scenario hook for shared table-oriented workloads. +/// +/// A scenario supplies the initial schema, scenario-specific commit-time +/// properties, and any final invariant over the collected outcome. +pub trait TableScenario: Clone { + fn name(&self) -> &'static str; + fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan; + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()>; + fn commit_properties(&self) -> Vec; + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize); +} + +/// Materialized shared table-workload case reused by multiple targets. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct TableWorkloadCase { + /// Seed used to derive schema and workload decisions. + pub seed: crate::seed::DstSeed, + /// Shared workload scenario identifier. + pub scenario: TableScenarioId, + /// Number of simulated client connections in the run. + pub num_connections: usize, + /// Initial schema installed into target before replaying interactions. + pub schema: SchemaPlan, + /// Materialized interaction trace for replay and shrinking. + pub interactions: Vec, +} + +/// One generated workload step. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum TableWorkloadInteraction { + BeginTx { conn: usize }, + CommitTx { conn: usize }, + RollbackTx { conn: usize }, + Insert { conn: usize, table: usize, row: SimRow }, + Delete { conn: usize, table: usize, row: SimRow }, + Check(TableProperty), +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum TableWorkloadEvent { + /// One interaction executed successfully. + Executed(TableWorkloadInteraction), +} + +/// Final state gathered from a table-workload engine after execution ends. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct TableWorkloadOutcome { + /// Row count for each table in schema order. + pub final_row_counts: Vec, + /// Full committed rows for each table in schema order. + pub final_rows: Vec>, +} + +/// First failing interaction observed while executing a generated workload. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct TableWorkloadExecutionFailure { + /// Zero-based position of the failing interaction. + pub step_index: usize, + /// Target-provided error message. + pub reason: String, + /// Interaction that triggered the failure. + pub interaction: TableWorkloadInteraction, +} + +/// Minimal engine interface implemented by concrete table-oriented targets. +pub trait TableWorkloadEngine { + fn execute(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String>; + fn collect_outcome(&mut self) -> anyhow::Result; + fn finish(&mut self); +} + +/// Per-connection write transaction bookkeeping shared by locking targets. +pub struct ConnectionWriteState { + /// Open mutable transaction handle for each simulated connection. + pub tx_by_connection: Vec>, + /// Connection that currently owns the single-writer lock, if any. + pub active_writer: Option, +} + +impl ConnectionWriteState { + pub fn new(connection_count: usize) -> Self { + Self { + tx_by_connection: (0..connection_count).map(|_| None).collect(), + active_writer: None, + } + } + + pub fn ensure_known_connection(&self, conn: usize) -> Result<(), String> { + self.tx_by_connection + .get(conn) + .map(|_| ()) + .ok_or_else(|| format!("connection {conn} out of range")) + } + + pub fn ensure_writer_owner(&self, conn: usize, action: &str) -> Result<(), String> { + self.ensure_known_connection(conn)?; + match self.active_writer { + Some(owner) if owner == conn => Ok(()), + Some(owner) => Err(format!( + "connection {conn} cannot {action} while connection {owner} owns lock" + )), + None => Err(format!("connection {conn} has no transaction to {action}")), + } + } +} From 63753dfe4135a040cc62d7f806a5207dc41a55e0 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 22 Apr 2026 19:52:38 +0530 Subject: [PATCH 08/74] remove deadcode --- Cargo.lock | 6 +- Cargo.toml | 2 +- crates/dst/Cargo.toml | 8 +- crates/dst/README.md | 50 +- crates/dst/src/bugbase.rs | 15 +- crates/dst/src/lib.rs | 56 +- crates/dst/src/main.rs | 14 +- crates/dst/src/runner.rs | 60 --- crates/dst/src/schema.rs | 22 +- crates/dst/src/seed.rs | 31 +- crates/dst/src/shrink.rs | 2 +- crates/dst/src/sim/mod.rs | 4 - crates/dst/src/sim/scheduler.rs | 141 ----- crates/dst/src/sim/sync.rs | 109 ---- crates/dst/src/subsystem.rs | 54 -- crates/dst/src/targets/datastore.rs | 493 ++++-------------- crates/dst/src/targets/harness.rs | 53 +- crates/dst/src/targets/mod.rs | 2 +- crates/dst/src/targets/relational_db.rs | 221 ++++---- crates/dst/src/trace.rs | 85 --- .../dst/src/workload/table_ops/generation.rs | 10 +- crates/dst/src/workload/table_ops/mod.rs | 15 +- crates/dst/src/workload/table_ops/model.rs | 4 + .../dst/src/workload/table_ops/properties.rs | 70 ++- .../workload/table_ops/scenarios/banking.rs | 4 +- .../src/workload/table_ops/scenarios/mod.rs | 47 +- .../table_ops/scenarios/random_crud.rs | 254 ++++++++- crates/dst/src/workload/table_ops/types.rs | 21 +- 28 files changed, 666 insertions(+), 1187 deletions(-) delete mode 100644 crates/dst/src/runner.rs delete mode 100644 crates/dst/src/sim/mod.rs delete mode 100644 crates/dst/src/sim/scheduler.rs delete mode 100644 crates/dst/src/sim/sync.rs delete mode 100644 crates/dst/src/subsystem.rs delete mode 100644 crates/dst/src/trace.rs diff --git a/Cargo.lock b/Cargo.lock index e5a9c0f843a..cbf00b21508 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8125,18 +8125,18 @@ name = "spacetimedb-dst" version = "2.1.0" dependencies = [ "anyhow", - "pretty_assertions", - "proptest", + "clap 4.5.50", "serde", "serde_json", + "spacetimedb-core", "spacetimedb-datastore", + "spacetimedb-durability", "spacetimedb-execution", "spacetimedb-lib 2.1.0", "spacetimedb-primitives 2.1.0", "spacetimedb-sats 2.1.0", "spacetimedb-schema", "spacetimedb-table", - "tempfile", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f221c60e113..4db66e44ac0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -387,7 +387,7 @@ features = [ ] [workspace.lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(madsim)'] } [workspace.lints.clippy] # FIXME: we should work on this lint incrementally diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 6f6592bfb76..acad3047ade 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -6,6 +6,9 @@ license-file = "LICENSE" description = "Deterministic simulation testing utilities for SpacetimeDB crates" rust-version.workspace = true +[lints] +workspace = true + [[bin]] name = "spacetimedb-dst" path = "src/main.rs" @@ -25,8 +28,3 @@ spacetimedb-primitives.workspace = true spacetimedb-sats.workspace = true spacetimedb-schema = { workspace = true, features = ["test"] } spacetimedb-table.workspace = true - -[dev-dependencies] -pretty_assertions.workspace = true -proptest.workspace = true -tempfile.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md index 390ce478669..16092901f61 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -7,53 +7,44 @@ Deterministic simulation testing utilities for SpacetimeDB. This crate contains reusable pieces for building deterministic simulations, shared workload generators, and concrete DST targets. -- root harness: - `seed.rs`, `trace.rs`, `subsystem.rs`, `runner.rs` -- root generic helpers: +- root helpers: + `seed.rs`, `config.rs` +- root internal helpers: `bugbase.rs`, `shrink.rs` -- root shared target helpers: - `config.rs`, `schema.rs` +- root shared target internals: + `schema.rs` - `workload/`: shared table-style workload split into scenarios, generation, model, and properties -- `sim/`: - reusable simulator primitives like `scheduler.rs` and `sync.rs` - `targets/`: `datastore.rs`, `relational_db.rs` - binary: - `src/bin/dst.rs` + `src/main.rs` ## Reading Order If you are new to the crate, this order keeps the mental model small: -1. `subsystem.rs` -2. `runner.rs` +1. `src/main.rs` +2. `config.rs` 3. `seed.rs` -4. `trace.rs` -5. `sim/scheduler.rs` -6. `config.rs` -7. `schema.rs` -8. `workload/table_ops/` -9. `bugbase.rs` -10. `shrink.rs` -11. `targets/datastore.rs` -12. `targets/relational_db.rs` +4. `workload/table_ops/` +5. `targets/datastore.rs` +6. `targets/relational_db.rs` ## Core Model Most code in the crate revolves around the same shape: - `Case`: generated input for one deterministic run. -- `Trace`: ordered execution record. - `Outcome`: final observable result. -- Invariants: assertions over the run record. +- Properties/checks: assertions performed during execution or against the final outcome. That separation is intentional: - generation decides what to try, - execution decides what happened, -- invariants decide whether the run is acceptable, +- properties decide whether the run is acceptable, - shrinking tries to keep the failure while deleting unnecessary steps. ## Shared Table Workload Map @@ -82,8 +73,7 @@ reuse that workload and swap in target-specific engines. For a failing target case: 1. `run_case_detailed` returns `DatastoreExecutionFailure` -2. root `bugbase.rs` can serialize failure plus original case -3. root `shrink.rs` truncates after failure and tries removing interactions +2. internal `shrink.rs` truncates after failure and tries removing interactions while preserving the same failure reason ## CLI @@ -94,16 +84,18 @@ Core commands: ```bash cargo run -p spacetimedb-dst -- run --target datastore --scenario banking --duration 5m +cargo run -p spacetimedb-dst -- run --target datastore --scenario indexed-ranges --duration 5m cargo run -p spacetimedb-dst -- run --target relational-db --seed 42 --max-interactions 2000 cargo run -p spacetimedb-dst -- replay --target datastore bug.json cargo run -p spacetimedb-dst -- shrink --target datastore bug.json ``` -Library unit tests remain for deterministic helpers, shrinking, and small -target correctness checks. Scenario soak runs should go through CLI. +DST workloads are run from CLI only. Use `random-crud` for broad coverage and +`indexed-ranges` when you want to bias toward secondary/composite index range +behavior without hardcoding a single historical bug. ## Current Scope -This crate provides deterministic replay primitives, shared table workload -generation, two concrete targets (`datastore` and `relational_db`), and a -small CLI for seeded or duration-bounded runs. +This crate provides shared table workload generation, two concrete targets +(`datastore` and `relational_db`), and a small CLI for seeded or +duration-bounded runs. diff --git a/crates/dst/src/bugbase.rs b/crates/dst/src/bugbase.rs index f045c9fde13..1fa4ac23a54 100644 --- a/crates/dst/src/bugbase.rs +++ b/crates/dst/src/bugbase.rs @@ -2,26 +2,17 @@ use std::{fs, path::Path}; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; - -/// Generic persisted failure artifact for one deterministic run. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct BugArtifact { - pub seed: u64, - pub failure: F, - pub case: C, - pub shrunk_case: Option, -} +use serde::{de::DeserializeOwned, Serialize}; /// Writes any serializable value to disk as pretty JSON. -pub fn save_json(path: impl AsRef, value: &T) -> anyhow::Result<()> { +pub(crate) fn save_json(path: impl AsRef, value: &T) -> anyhow::Result<()> { let body = serde_json::to_string_pretty(value)?; fs::write(path, body)?; Ok(()) } /// Loads any JSON value written by [`save_json`]. -pub fn load_json(path: impl AsRef) -> anyhow::Result { +pub(crate) fn load_json(path: impl AsRef) -> anyhow::Result { let body = fs::read_to_string(path)?; Ok(serde_json::from_str(&body)?) } diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index dde53f7efb0..be2e06062c1 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -1,59 +1,23 @@ //! Deterministic simulation testing utilities for SpacetimeDB crates. //! -//! Layout: +//! Public surface is intentionally narrow and centered on the CLI: //! -//! - Root: harness pieces such as [`seed`], [`trace`], [`subsystem`], and -//! [`runner`]. -//! - Root shared target helpers: [`config`] and [`schema`]. -//! - Root generic helpers: [`bugbase`] and [`shrink`]. -//! - [`sim`]: reusable simulator primitives such as [`scheduler`] and [`sync`]. -//! - [`workload`]: shared workload/model/property generation reused by targets. -//! - [`targets`]: concrete simulation targets such as [`datastore_sim`] and -//! `relational_db`. +//! - [`config`] for run budgets, +//! - [`seed`] for deterministic seeds, +//! - [`workload`] for scenario identifiers, +//! - [`targets`] for the executable datastore / relational-db adapters. //! -//! Reading guide: -//! -//! - Start with [`subsystem`] to understand the common `Case -> Trace -> -//! Outcome` shape used across simulations. -//! - Then read [`runner`] for the small orchestration helpers that generate, -//! run, and replay a case. -//! - Read [`sim`] for reusable simulation building blocks. -//! - Read [`workload`] for shared table-workload planning split into -//! scenarios, generation, model, and properties. -//! - Then read the concrete targets in [`targets`]. -//! - [`config`] and [`schema`] hold reusable target-side data shapes. -//! - [`bugbase`] and [`shrink`] are the debugging path after a failure. -//! -//! The crate is primarily a library crate, but long-running DST workloads are -//! intended to be driven through the `dst` binary via `run`, `replay`, and -//! `shrink` commands. +//! The `spacetimedb-dst` binary drives those pieces through `run`, `replay`, +//! and `shrink`. -/// Generic persisted failure artifacts and JSON helpers. -pub mod bugbase; +mod bugbase; /// Shared run-budget configuration for DST targets. pub mod config; -/// Small helpers for generating, running, rerunning, and replay-checking cases. -pub mod runner; -/// Shared schema and row model used by DST targets. -pub mod schema; +mod schema; /// Stable seed and RNG utilities used to make runs reproducible. pub mod seed; -/// Generic shrinking helpers. -pub mod shrink; -/// Reusable simulation primitives. -pub mod sim; -/// Common traits and result types shared by DST subsystems. -pub mod subsystem; +mod shrink; /// Concrete simulator targets. pub mod targets; -/// Trace data structures used to record deterministic execution. -pub mod trace; /// Shared workload generators reused by multiple targets. pub mod workload; - -/// Generic actor scheduler used by deterministic simulations. -pub use sim::scheduler; -/// Small in-memory synchronization model used by scheduler-oriented tests. -pub use sim::sync; -/// Higher-level randomized datastore simulator with schema and interaction plans. -pub use targets::datastore as datastore_sim; diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index e6ef2abb392..5cfb5696128 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -73,6 +73,7 @@ enum TargetKind { #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] enum ScenarioKind { RandomCrud, + IndexedRanges, Banking, } @@ -80,6 +81,7 @@ impl From for TableScenarioId { fn from(value: ScenarioKind) -> Self { match value { ScenarioKind::RandomCrud => TableScenarioId::RandomCrud, + ScenarioKind::IndexedRanges => TableScenarioId::IndexedRanges, ScenarioKind::Banking => TableScenarioId::Banking, } } @@ -210,11 +212,11 @@ fn replay_relational(path: &Path) -> anyhow::Result<()> { fn replay_datastore_case(case: &datastore::DatastoreSimulatorCase) -> anyhow::Result<()> { match datastore::run_case_detailed(case) { - Ok(record) => { + Ok(_) => { println!( "ok target=datastore seed={} steps={}", - record.seed.0, - record.case.interactions.len() + case.seed.0, + case.interactions.len() ); Ok(()) } @@ -230,11 +232,11 @@ fn replay_datastore_case(case: &datastore::DatastoreSimulatorCase) -> anyhow::Re fn replay_relational_case(case: &relational_db::RelationalDbSimulatorCase) -> anyhow::Result<()> { match relational_db::run_case_detailed(case) { - Ok(record) => { + Ok(_) => { println!( "ok target=relational_db seed={} steps={}", - record.seed.0, - record.case.interactions.len() + case.seed.0, + case.interactions.len() ); Ok(()) } diff --git a/crates/dst/src/runner.rs b/crates/dst/src/runner.rs deleted file mode 100644 index 35b962f3504..00000000000 --- a/crates/dst/src/runner.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! Minimal orchestration helpers for deterministic subsystems. -//! -//! These helpers intentionally stay thin: -//! -//! - generate a case from a seed, -//! - run it, -//! - rerun the exact same case, -//! - compare trace and outcome for replayability. - -use crate::{ - scheduler::{Actor, ScheduleMode, Scheduler}, - seed::DstSeed, - subsystem::{DstSubsystem, RunRecord}, - trace::Trace, -}; - -/// Runs generic actors under the seeded scheduler and returns the trace. -pub fn run_seeded(actors: Vec, seed: DstSeed) -> Trace { - Scheduler::new(actors, ScheduleMode::Seeded, Some(seed.rng())).run_to_completion() -} - -/// Generates a case from `seed` and executes it once. -pub fn run_generated(seed: DstSeed) -> anyhow::Result> { - let case = S::generate_case(seed); - S::run_case(&case) -} - -/// Re-executes the exact case stored in a previous run record. -pub fn rerun_case( - record: &RunRecord, -) -> anyhow::Result> { - S::run_case(&record.case) -} - -/// Re-executes a run and checks that both trace and outcome match. -pub fn verify_repeatable_execution( - record: &RunRecord, -) -> anyhow::Result> { - let replayed = S::run_case(&record.case)?; - - if replayed.trace != record.trace { - anyhow::bail!( - "repeatability trace mismatch for subsystem `{}`:\nexpected: {:?}\nactual: {:?}", - record.subsystem, - record.trace.as_ref().map(|trace| trace.as_slice()), - replayed.trace.as_ref().map(|trace| trace.as_slice()) - ); - } - - if replayed.outcome != record.outcome { - anyhow::bail!( - "outcome replay mismatch for subsystem `{}`:\nexpected: {:?}\nactual: {:?}", - record.subsystem, - record.outcome, - replayed.outcome - ); - } - - Ok(replayed) -} diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs index 11f189bc1d7..ab12f834bf5 100644 --- a/crates/dst/src/schema.rs +++ b/crates/dst/src/schema.rs @@ -19,8 +19,11 @@ pub struct TablePlan { pub name: String, /// Ordered column definitions. Column 0 is treated as the primary id column. pub columns: Vec, - /// Optional secondary indexed column used to exercise index installation paths. - pub secondary_index_col: Option, + /// Additional indexed column sets beyond the implicit primary id index. + /// + /// A value like `[1]` means a single-column secondary index on column 1. + /// A value like `[0, 1]` means a composite btree index over columns 0 and 1. + pub extra_indexes: Vec>, } /// Column definition used by simulators. @@ -122,6 +125,21 @@ impl SimRow { } } + pub fn project_key(&self, cols: &[u16]) -> Self { + let values = cols + .iter() + .map(|&col| self.values[col as usize].clone()) + .collect::>(); + SimRow { values } + } + + pub fn to_algebraic_value(&self) -> AlgebraicValue { + match self.values.as_slice() { + [value] => value.clone(), + _ => ProductValue::from_iter(self.values.iter().cloned()).into(), + } + } + pub fn id(&self) -> Option { match self.values.first() { Some(AlgebraicValue::U64(value)) => Some(*value), diff --git a/crates/dst/src/seed.rs b/crates/dst/src/seed.rs index b30ad77f102..75ac1e0c32d 100644 --- a/crates/dst/src/seed.rs +++ b/crates/dst/src/seed.rs @@ -11,12 +11,12 @@ use serde::{Deserialize, Serialize}; pub struct DstSeed(pub u64); impl DstSeed { - pub fn fork(self, discriminator: u64) -> Self { + pub(crate) fn fork(self, discriminator: u64) -> Self { // derive independent seed using same mixing primitive Self(splitmix64(self.0 ^ discriminator.wrapping_mul(GAMMA))) } - pub fn rng(self) -> DstRng { + pub(crate) fn rng(self) -> DstRng { DstRng { state: splitmix64(self.0), } @@ -25,18 +25,18 @@ impl DstSeed { /// Small deterministic RNG for simulator code. #[derive(Clone, Debug)] -pub struct DstRng { +pub(crate) struct DstRng { state: u64, } impl DstRng { - pub fn next_u64(&mut self) -> u64 { + pub(crate) fn next_u64(&mut self) -> u64 { // advance state, then reuse splitmix64 mixing self.state = self.state.wrapping_add(GAMMA); splitmix64(self.state) } - pub fn index(&mut self, len: usize) -> usize { + pub(crate) fn index(&mut self, len: usize) -> usize { assert!(len > 0, "len must be non-zero"); (self.next_u64() as usize) % len } @@ -52,24 +52,3 @@ fn splitmix64(mut x: u64) -> u64 { x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); x ^ (x >> 31) } - -#[cfg(test)] -mod tests { - use super::DstSeed; - - #[test] - fn fork_is_stable_and_distinct() { - let seed = DstSeed(7); - assert_eq!(seed.fork(1), seed.fork(1)); - assert_ne!(seed.fork(1), seed.fork(2)); - } - - #[test] - fn rng_sequence_is_replayable() { - let mut a = DstSeed(99).rng(); - let mut b = DstSeed(99).rng(); - for _ in 0..8 { - assert_eq!(a.next_u64(), b.next_u64()); - } - } -} diff --git a/crates/dst/src/shrink.rs b/crates/dst/src/shrink.rs index 302f43cd172..6796c92c8fd 100644 --- a/crates/dst/src/shrink.rs +++ b/crates/dst/src/shrink.rs @@ -1,7 +1,7 @@ //! Generic shrinking helpers for deterministic targets. /// Generic remove-and-replay shrink loop. -pub fn shrink_by_removing( +pub(crate) fn shrink_by_removing( case: &C, target_failure: &F, truncate: impl Fn(&C) -> C, diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs deleted file mode 100644 index b276cf1baef..00000000000 --- a/crates/dst/src/sim/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Reusable simulator primitives. - -pub mod scheduler; -pub mod sync; diff --git a/crates/dst/src/sim/scheduler.rs b/crates/dst/src/sim/scheduler.rs deleted file mode 100644 index 8f009df2fff..00000000000 --- a/crates/dst/src/sim/scheduler.rs +++ /dev/null @@ -1,141 +0,0 @@ -//! Generic scheduler for actor-style deterministic simulations. -//! -//! The scheduler is deliberately small. It repeatedly selects a runnable actor, -//! lets it emit events into the trace, and stops once every actor reports that -//! it is complete. - -use crate::{seed::DstRng, trace::Trace}; - -/// Result of asking an actor to make one step of progress. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum StepState { - Progressed, - Blocked, - Complete, -} - -/// Minimal interface for something the scheduler can drive. -pub trait Actor { - type Event: Clone; - - fn step(&mut self, trace: &mut Trace) -> StepState; - fn is_complete(&self) -> bool; -} - -/// Policy for choosing the next runnable actor. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum ScheduleMode { - RoundRobin, - Seeded, -} - -/// Deterministic actor scheduler with either round-robin or seeded selection. -pub struct Scheduler { - actors: Vec, - cursor: usize, - rng: Option, - trace: Trace, -} - -impl Scheduler { - pub fn new(actors: Vec, mode: ScheduleMode, rng: Option) -> Self { - let rng = match mode { - ScheduleMode::RoundRobin => None, - ScheduleMode::Seeded => Some(rng.expect("seeded mode requires rng")), - }; - Self { - actors, - cursor: 0, - rng, - trace: Trace::default(), - } - } - - pub fn run_to_completion(mut self) -> Trace { - while self.step_once() {} - self.trace - } - - pub fn step_once(&mut self) -> bool { - let runnable = self.runnable_indices(); - if runnable.is_empty() { - return false; - } - - let pick = if let Some(rng) = &mut self.rng { - runnable[rng.index(runnable.len())] - } else { - let pick = runnable[self.cursor % runnable.len()]; - self.cursor = self.cursor.wrapping_add(1); - pick - }; - - !matches!(self.actors[pick].step(&mut self.trace), StepState::Complete) - || self.actors.iter().any(|actor| !actor.is_complete()) - } - - fn runnable_indices(&self) -> Vec { - self.actors - .iter() - .enumerate() - .filter_map(|(idx, actor)| (!actor.is_complete()).then_some(idx)) - .collect() - } -} - -#[cfg(test)] -mod tests { - use crate::trace::Trace; - - use super::{Actor, ScheduleMode, Scheduler, StepState}; - - #[derive(Clone)] - struct CounterActor { - label: &'static str, - remaining: usize, - } - - impl Actor for CounterActor { - type Event = &'static str; - - fn step(&mut self, trace: &mut Trace) -> StepState { - if self.remaining == 0 { - return StepState::Complete; - } - trace.push(self.label); - self.remaining -= 1; - if self.remaining == 0 { - StepState::Complete - } else { - StepState::Progressed - } - } - - fn is_complete(&self) -> bool { - self.remaining == 0 - } - } - - #[test] - fn round_robin_scheduler_is_stable() { - let trace = Scheduler::new( - vec![ - CounterActor { - label: "a", - remaining: 2, - }, - CounterActor { - label: "b", - remaining: 2, - }, - ], - ScheduleMode::RoundRobin, - None, - ) - .run_to_completion(); - assert_eq!( - trace.as_slice().iter().map(|event| event.event).collect::>(), - vec!["a", "b", "a", "b"] - ); - } -} diff --git a/crates/dst/src/sim/sync.rs b/crates/dst/src/sim/sync.rs deleted file mode 100644 index f247e787bcb..00000000000 --- a/crates/dst/src/sim/sync.rs +++ /dev/null @@ -1,109 +0,0 @@ -//! Tiny synchronization primitives for deterministic tests. -//! -//! This file models only the behavior needed by crate tests; it is not trying -//! to be a full synchronization library. - -use std::collections::VecDeque; - -/// Lock lifecycle events emitted by [`SimRwLock`]. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum LockEventKind { - ReadRequested, - WriteRequested, - ReadGranted, - WriteGranted, - ReadReleased, - WriteReleased, -} - -/// One simulated lock event tagged with the actor that caused it. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct LockEvent { - pub actor_id: usize, - pub kind: LockEventKind, -} - -/// Minimal FIFO read/write lock model used in deterministic tests. -#[derive(Clone, Debug, Default)] -pub struct SimRwLock { - readers: usize, - writer: Option, - waiters: VecDeque<(usize, LockEventKind)>, -} - -impl SimRwLock { - pub fn request_read(&mut self, actor_id: usize) -> LockEvent { - self.waiters.push_back((actor_id, LockEventKind::ReadRequested)); - LockEvent { - actor_id, - kind: LockEventKind::ReadRequested, - } - } - - pub fn request_write(&mut self, actor_id: usize) -> LockEvent { - self.waiters.push_back((actor_id, LockEventKind::WriteRequested)); - LockEvent { - actor_id, - kind: LockEventKind::WriteRequested, - } - } - - pub fn grant_next(&mut self) -> Option { - let &(actor_id, kind) = self.waiters.front()?; - match kind { - LockEventKind::ReadRequested if self.writer.is_none() => { - self.waiters.pop_front(); - self.readers += 1; - Some(LockEvent { - actor_id, - kind: LockEventKind::ReadGranted, - }) - } - LockEventKind::WriteRequested if self.writer.is_none() && self.readers == 0 => { - self.waiters.pop_front(); - self.writer = Some(actor_id); - Some(LockEvent { - actor_id, - kind: LockEventKind::WriteGranted, - }) - } - _ => None, - } - } - - pub fn release_read(&mut self, actor_id: usize) -> LockEvent { - assert!(self.readers > 0, "no reader to release"); - self.readers -= 1; - LockEvent { - actor_id, - kind: LockEventKind::ReadReleased, - } - } - - pub fn release_write(&mut self, actor_id: usize) -> LockEvent { - assert_eq!(self.writer, Some(actor_id), "actor does not own write lock"); - self.writer = None; - LockEvent { - actor_id, - kind: LockEventKind::WriteReleased, - } - } -} - -#[cfg(test)] -mod tests { - use super::{LockEventKind, SimRwLock}; - - #[test] - fn writer_waits_for_reader() { - let mut lock = SimRwLock::default(); - lock.request_read(1); - assert_eq!(lock.grant_next().unwrap().kind, LockEventKind::ReadGranted); - - lock.request_write(2); - assert!(lock.grant_next().is_none()); - - lock.release_read(1); - assert_eq!(lock.grant_next().unwrap().kind, LockEventKind::WriteGranted); - } -} diff --git a/crates/dst/src/subsystem.rs b/crates/dst/src/subsystem.rs deleted file mode 100644 index 1978e70a9fd..00000000000 --- a/crates/dst/src/subsystem.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Shared traits for deterministic simulation subsystems. -//! -//! A subsystem defines: -//! -//! - a generated `Case`, -//! - a stream of traced `Event`s, -//! - a final `Outcome`. -//! -//! `RunRecord` packages those pieces together so replay checks and invariants -//! can reason about one run without knowing subsystem-specific details. - -use crate::{seed::DstSeed, trace::Trace}; - -/// A deterministic simulation subsystem. -pub trait DstSubsystem { - type Case: Clone + core::fmt::Debug + Eq + PartialEq; - type Event: Clone + core::fmt::Debug + Eq + PartialEq; - type Outcome: Clone + core::fmt::Debug + Eq + PartialEq; - - fn name() -> &'static str; - fn generate_case(seed: DstSeed) -> Self::Case; - fn run_case(case: &Self::Case) -> anyhow::Result>; -} - -/// Result of one fully executed deterministic run. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct RunRecord { - /// Human-readable subsystem name used in logs and replay diagnostics. - pub subsystem: &'static str, - /// Top-level seed that produced this run. - pub seed: DstSeed, - /// Full generated or loaded input case. - pub case: C, - /// Optional execution trace collected while the case ran. - pub trace: Option>, - /// Final target-specific outcome after execution completes. - pub outcome: O, -} - -/// Post-run assertion over a run record. -pub trait Invariant { - fn name(&self) -> &'static str; - fn check(&self, run: &R) -> anyhow::Result<()>; -} - -/// Runs each invariant and annotates failures with the invariant name. -pub fn assert_invariants(run: &R, invariants: &[&dyn Invariant]) -> anyhow::Result<()> { - for invariant in invariants { - invariant - .check(run) - .map_err(|err| anyhow::anyhow!("invariant `{}` failed: {err}", invariant.name()))?; - } - Ok(()) -} diff --git a/crates/dst/src/targets/datastore.rs b/crates/dst/src/targets/datastore.rs index 41bd1cf37db..6b3ea2c703c 100644 --- a/crates/dst/src/targets/datastore.rs +++ b/crates/dst/src/targets/datastore.rs @@ -5,7 +5,7 @@ use std::path::Path; use spacetimedb_datastore::{ execution_context::Workload, locking_tx_datastore::{datastore::Locking, MutTxId}, - traits::{IsolationLevel, MutTx, MutTxDatastore, Tx}, + traits::{IsolationLevel, MutTx, MutTxDatastore, Tx, TxDatastore}, }; use spacetimedb_execution::Datastore as _; use spacetimedb_lib::{ @@ -22,40 +22,26 @@ use spacetimedb_schema::{ use spacetimedb_table::page_pool::PagePool; use crate::{ - bugbase::{load_json, save_json, BugArtifact}, config::RunConfig, schema::{SchemaPlan, SimRow}, seed::DstSeed, - subsystem::{DstSubsystem, RunRecord}, targets::harness::{self, TableTargetHarness}, workload::table_ops::{ - ConnectionWriteState, TableProperty, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, - TableWorkloadEvent, TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, + ConnectionWriteState, PropertyBound, TableProperty, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, + TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, }, }; -pub type DatastoreScenario = TableScenarioId; pub type DatastoreSimulatorCase = TableWorkloadCase; -pub type Interaction = TableWorkloadInteraction; -pub type DatastoreSimulatorEvent = TableWorkloadEvent; pub type DatastoreSimulatorOutcome = TableWorkloadOutcome; pub type DatastoreExecutionFailure = TableWorkloadExecutionFailure; -pub type DatastoreBugArtifact = BugArtifact; -pub type DatastoreRunConfig = RunConfig; -pub use crate::config::parse_duration_spec; - -/// DST subsystem wrapper around the randomized datastore simulator. -pub struct DatastoreSimulatorSubsystem; +type Interaction = TableWorkloadInteraction; struct DatastoreTarget; impl TableTargetHarness for DatastoreTarget { type Engine = DatastoreEngine; - fn target_name() -> &'static str { - DatastoreSimulatorSubsystem::name() - } - fn connection_seed_discriminator() -> u64 { 17 } @@ -65,66 +51,20 @@ impl TableTargetHarness for DatastoreTarget { } } -impl DstSubsystem for DatastoreSimulatorSubsystem { - type Case = DatastoreSimulatorCase; - type Event = DatastoreSimulatorEvent; - type Outcome = DatastoreSimulatorOutcome; - - fn name() -> &'static str { - "datastore-simulator" - } - - fn generate_case(seed: DstSeed) -> Self::Case { - harness::generate_case::(seed, DatastoreScenario::RandomCrud) - } - - fn run_case(case: &Self::Case) -> anyhow::Result> { - harness::run_case_detailed::(case).map_err(|failure| { - anyhow::anyhow!( - "datastore simulator failed at step {}: {}", - failure.step_index, - failure.reason - ) - }) - } -} - -pub fn generate_case(seed: DstSeed) -> DatastoreSimulatorCase { - generate_case_for_scenario(seed, DatastoreScenario::RandomCrud) -} - -pub fn generate_case_for_scenario(seed: DstSeed, scenario: DatastoreScenario) -> DatastoreSimulatorCase { - harness::generate_case::(seed, scenario) -} - -pub fn materialize_case(seed: DstSeed, scenario: DatastoreScenario, max_interactions: usize) -> DatastoreSimulatorCase { +pub fn materialize_case(seed: DstSeed, scenario: TableScenarioId, max_interactions: usize) -> DatastoreSimulatorCase { harness::materialize_case::(seed, scenario, max_interactions) } pub fn run_case_detailed( case: &DatastoreSimulatorCase, -) -> Result< - RunRecord, - DatastoreExecutionFailure, -> { +) -> Result { harness::run_case_detailed::(case) } -pub fn run_generated_stream(seed: DstSeed, max_interactions: usize) -> anyhow::Result { - run_generated_with_config(seed, DatastoreRunConfig::with_max_interactions(max_interactions)) -} - -pub fn run_generated_with_config( - seed: DstSeed, - config: DatastoreRunConfig, -) -> anyhow::Result { - run_generated_with_config_and_scenario(seed, DatastoreScenario::RandomCrud, config) -} - pub fn run_generated_with_config_and_scenario( seed: DstSeed, - scenario: DatastoreScenario, - config: DatastoreRunConfig, + scenario: TableScenarioId, + config: RunConfig, ) -> anyhow::Result { harness::run_generated_with_config_and_scenario::(seed, scenario, config) } @@ -137,18 +77,6 @@ pub fn load_case(path: impl AsRef) -> anyhow::Result anyhow::Result { - harness::failure_reason::(case) -} - -pub fn save_bug_artifact(path: impl AsRef, artifact: &DatastoreBugArtifact) -> anyhow::Result<()> { - save_json(path, artifact) -} - -pub fn load_bug_artifact(path: impl AsRef) -> anyhow::Result { - load_json(path) -} - pub fn shrink_failure( case: &DatastoreSimulatorCase, failure: &DatastoreExecutionFailure, @@ -230,6 +158,44 @@ impl DatastoreEngine { rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) } + + fn fresh_range_scan( + &self, + table_id: TableId, + cols: &[u16], + lower: &PropertyBound, + upper: &PropertyBound, + ) -> anyhow::Result> { + let tx = self.datastore.begin_tx(Workload::ForTests); + let cols = cols.iter().copied().collect::(); + let lower = lower.to_range_bound(); + let upper = upper.to_range_bound(); + let rows = self + .datastore + .iter_by_col_range_tx(&tx, table_id, cols, (lower, upper))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect(); + Ok(rows) + } + + fn in_tx_range_scan( + &self, + tx: &MutTxId, + table_id: TableId, + cols: &[u16], + lower: &PropertyBound, + upper: &PropertyBound, + ) -> anyhow::Result> { + let cols = cols.iter().copied().collect::(); + let lower = lower.to_range_bound(); + let upper = upper.to_range_bound(); + let rows = self + .datastore + .iter_by_col_range_mut_tx(tx, table_id, cols, (lower, upper))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect(); + Ok(rows) + } } impl TableWorkloadEngine for DatastoreEngine { @@ -271,7 +237,7 @@ impl TableWorkloadEngine for DatastoreEngine { } Interaction::Insert { conn, table, row } => { self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + let bsatn = row.to_bsatn().map_err(|err: anyhow::Error| err.to_string())?; datastore .insert_mut_tx(tx, table_id, &bsatn) .map_err(|err| format!("insert failed: {err}"))?; @@ -366,6 +332,57 @@ impl TableWorkloadEngine for DatastoreEngine { return Err(format!("row count mismatch: expected={expected} actual={actual}")); } } + Interaction::Check(TableProperty::RangeScanInConnection { + conn, + table, + cols, + lower, + upper, + expected_rows, + }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let mut actual_rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { + self.in_tx_range_scan(tx, table_id, cols, lower, upper) + .map_err(|err| format!("in-tx range scan failed: {err}"))? + } else { + self.fresh_range_scan(table_id, cols, lower, upper) + .map_err(|err| format!("fresh range scan failed: {err}"))? + }; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + let mut expected_rows = expected_rows.clone(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + if actual_rows != expected_rows { + return Err(format!( + "connection range scan mismatch on table {table}, cols={cols:?}: expected={expected_rows:?} actual={actual_rows:?}" + )); + } + } + Interaction::Check(TableProperty::RangeScanFresh { + table, + cols, + lower, + upper, + expected_rows, + }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let mut actual_rows = self + .fresh_range_scan(table_id, cols, lower, upper) + .map_err(|err| format!("fresh range scan failed: {err}"))?; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + let mut expected_rows = expected_rows.clone(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + if actual_rows != expected_rows { + return Err(format!( + "fresh range scan mismatch on table {table}, cols={cols:?}: expected={expected_rows:?} actual={actual_rows:?}" + )); + } + } Interaction::Check(TableProperty::TablesMatchFresh { left, right }) => { let left_rows = self .collect_rows_for_table(*left) @@ -435,10 +452,11 @@ fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result>().join("_"); indexes.push(IndexSchema::for_test( - format!("{}_c{col}_idx", table.name), - BTreeAlgorithm::from(col), + format!("{}_{}_idx", table.name, cols_name), + BTreeAlgorithm::from(cols.iter().copied().collect::()), )); } let constraints = vec![ConstraintSchema::unique_for_test( @@ -471,306 +489,9 @@ fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result &'static Mutex<()> { - static LOCK: OnceLock> = OnceLock::new(); - LOCK.get_or_init(|| Mutex::new(())) - } - - #[test] - fn generated_case_replays_identically() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let artifact = run_generated::(DstSeed(13)).expect("run datastore simulator case"); - let replayed = rerun_case::(&artifact).expect("rerun datastore simulator case"); - assert_eq!(artifact.case, replayed.case); - assert_eq!(artifact.trace, replayed.trace); - assert_eq!(artifact.outcome, replayed.outcome); - } - - #[test] - fn generated_case_has_repeatable_execution() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let artifact = run_generated::(DstSeed(23)).expect("run datastore simulator case"); - let replayed = - verify_repeatable_execution::(&artifact).expect("verify repeatable execution"); - assert_eq!(artifact.trace, replayed.trace); - assert_eq!(artifact.outcome, replayed.outcome); - } - - #[test] - fn failure_reports_stable_reason() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let case = failing_case(); - let failure = run_case_detailed(&case).expect_err("case should fail"); - assert_eq!(failure.step_index, 2); - assert!(failure.reason.contains("fresh lookup still found deleted row")); - assert_eq!(failure_reason(&case).expect("extract failure reason"), failure.reason); - } - - proptest! { - #[test] - fn datastore_simulator_holds_across_generated_seeds(seed in any::()) { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - run_generated::(DstSeed(seed)) - .unwrap_or_else(|err| panic!("seed {seed} failed: {err}")); - } - } - - #[test] - fn duration_specs_parse() { - assert_eq!(parse_duration_spec("5m").expect("parse 5m"), Duration::from_secs(300)); - assert_eq!(parse_duration_spec("2s").expect("parse 2s"), Duration::from_secs(2)); - assert_eq!( - parse_duration_spec("10ms").expect("parse 10ms"), - Duration::from_millis(10) - ); - } - - #[test] - fn banking_generation_uses_fixed_schema() { - let case = generate_case_for_scenario(DstSeed(9090), DatastoreScenario::Banking); - assert_eq!(case.scenario, DatastoreScenario::Banking); - assert_eq!(case.schema.tables.len(), 2); - assert_eq!(case.schema.tables[0].name, "debit_accounts"); - assert_eq!(case.schema.tables[1].name, "credit_accounts"); - } - - #[test] - fn generated_cases_keep_single_writer_lock() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let case = generate_case(DstSeed(4242)); - let mut owner = None; - - for interaction in case.interactions { - match interaction { - Interaction::BeginTx { conn } => { - assert_eq!(owner, None, "second writer opened before first closed"); - owner = Some(conn); - } - Interaction::CommitTx { conn } | Interaction::RollbackTx { conn } => { - assert_eq!(owner, Some(conn), "non-owner closed writer"); - owner = None; - } - Interaction::Insert { conn, .. } | Interaction::Delete { conn, .. } => { - if let Some(writer) = owner { - assert_eq!(conn, writer, "interaction ran on non-owner while writer open"); - } - } - Interaction::Check(TableProperty::VisibleInConnection { conn, .. }) - | Interaction::Check(TableProperty::MissingInConnection { conn, .. }) => { - if let Some(writer) = owner { - assert_eq!(conn, writer, "interaction ran on non-owner while writer open"); - } - } - Interaction::Check(_) => {} - } - } - - assert_eq!(owner, None, "writer left open at end of generated case"); - } - - #[test] - fn second_writer_fails_fast() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let case = DatastoreSimulatorCase { - seed: DstSeed(88), - scenario: DatastoreScenario::RandomCrud, - num_connections: 2, - schema: SchemaPlan { - tables: vec![TablePlan { - name: "locks".into(), - columns: vec![ - ColumnPlan { - name: "id".into(), - ty: AlgebraicType::U64, - }, - ColumnPlan { - name: "name".into(), - ty: AlgebraicType::String, - }, - ], - secondary_index_col: Some(1), - }], - }, - interactions: vec![Interaction::BeginTx { conn: 0 }, Interaction::BeginTx { conn: 1 }], - }; - - let failure = run_case_detailed(&case).expect_err("second writer should fail"); - assert_eq!(failure.step_index, 1); - assert!(failure.reason.contains("owns lock")); - } - - #[test] - fn bug_artifact_roundtrips() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let dir = tempdir().expect("create tempdir"); - let path = dir.path().join("bug.json"); - let case = DatastoreSimulatorCase { - seed: DstSeed(5), - scenario: DatastoreScenario::RandomCrud, - num_connections: 1, - schema: SchemaPlan { - tables: vec![TablePlan { - name: "bugs".into(), - columns: vec![ - ColumnPlan { - name: "id".into(), - ty: AlgebraicType::U64, - }, - ColumnPlan { - name: "ok".into(), - ty: AlgebraicType::Bool, - }, - ], - secondary_index_col: Some(1), - }], - }, - interactions: vec![Interaction::Check(TableProperty::VisibleFresh { - table: 0, - row: SimRow { - values: vec![AlgebraicValue::U64(7), AlgebraicValue::Bool(true)], - }, - })], - }; - let failure = run_case_detailed(&case).expect_err("case should fail"); - let artifact = DatastoreBugArtifact { - seed: case.seed.0, - failure, - case: case.clone(), - shrunk_case: Some(case), - }; - - save_bug_artifact(&path, &artifact).expect("save artifact"); - let loaded = load_bug_artifact(&path).expect("load artifact"); - assert_eq!(loaded, artifact); - } - - #[test] - fn shrink_drops_trailing_noise() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let case = DatastoreSimulatorCase { - seed: DstSeed(77), - scenario: DatastoreScenario::RandomCrud, - num_connections: 1, - schema: SchemaPlan { - tables: vec![TablePlan { - name: "bugs".into(), - columns: vec![ - ColumnPlan { - name: "id".into(), - ty: AlgebraicType::U64, - }, - ColumnPlan { - name: "name".into(), - ty: AlgebraicType::String, - }, - ], - secondary_index_col: Some(1), - }], - }, - interactions: vec![ - Interaction::Insert { - conn: 0, - table: 0, - row: SimRow { - values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], - }, - }, - Interaction::Check(TableProperty::VisibleFresh { - table: 0, - row: SimRow { - values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], - }, - }), - Interaction::Check(TableProperty::MissingFresh { - table: 0, - row: SimRow { - values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], - }, - }), - Interaction::Insert { - conn: 0, - table: 0, - row: SimRow { - values: vec![AlgebraicValue::U64(2), AlgebraicValue::String("two".into())], - }, - }, - ], - }; - - let failure = run_case_detailed(&case).expect_err("case should fail"); - let shrunk = shrink_failure(&case, &failure).expect("shrink failure"); - assert!(shrunk.interactions.len() < case.interactions.len()); - let shrunk_failure = run_case_detailed(&shrunk).expect_err("shrunk case should still fail"); - assert_eq!(shrunk_failure.reason, failure.reason); - } - - fn failing_case() -> DatastoreSimulatorCase { - DatastoreSimulatorCase { - seed: DstSeed(99), - scenario: DatastoreScenario::RandomCrud, - num_connections: 1, - schema: SchemaPlan { - tables: vec![TablePlan { - name: "bugs".into(), - columns: vec![ - ColumnPlan { - name: "id".into(), - ty: AlgebraicType::U64, - }, - ColumnPlan { - name: "name".into(), - ty: AlgebraicType::String, - }, - ], - secondary_index_col: Some(1), - }], - }, - interactions: vec![ - Interaction::Insert { - conn: 0, - table: 0, - row: SimRow { - values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], - }, - }, - Interaction::Check(TableProperty::VisibleFresh { - table: 0, - row: SimRow { - values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], - }, - }), - Interaction::Check(TableProperty::MissingFresh { - table: 0, - row: SimRow { - values: vec![AlgebraicValue::U64(1), AlgebraicValue::String("one".into())], - }, - }), - ], - } - } +fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) } diff --git a/crates/dst/src/targets/harness.rs b/crates/dst/src/targets/harness.rs index b4624861c4b..39d29b5483a 100644 --- a/crates/dst/src/targets/harness.rs +++ b/crates/dst/src/targets/harness.rs @@ -6,19 +6,15 @@ use crate::{ schema::SchemaPlan, seed::DstSeed, shrink::shrink_by_removing, - subsystem::RunRecord, - trace::Trace, workload::table_ops::{ - default_target_ops, execute_interactions, run_generated_with_engine, InteractionStream, TableScenario, - TableScenarioId, TableWorkloadCase, TableWorkloadEngine, TableWorkloadEvent, TableWorkloadExecutionFailure, - TableWorkloadOutcome, + execute_interactions, run_generated_with_engine, InteractionStream, TableScenario, TableScenarioId, + TableWorkloadCase, TableWorkloadEngine, TableWorkloadExecutionFailure, TableWorkloadOutcome, }, }; -pub trait TableTargetHarness { +pub(crate) trait TableTargetHarness { type Engine: TableWorkloadEngine; - fn target_name() -> &'static str; fn connection_seed_discriminator() -> u64; fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result; @@ -31,7 +27,7 @@ pub trait TableTargetHarness { } } -pub fn materialize_case( +pub(crate) fn materialize_case( seed: DstSeed, scenario: TableScenarioId, max_interactions: usize, @@ -50,37 +46,19 @@ pub fn materialize_case( } } -pub fn generate_case(seed: DstSeed, scenario: TableScenarioId) -> TableWorkloadCase { - let mut rng = seed.fork(T::connection_seed_discriminator()).rng(); - materialize_case::(seed, scenario, default_target_ops(&mut rng)) -} - -pub fn run_case_detailed( +pub(crate) fn run_case_detailed( case: &TableWorkloadCase, -) -> Result, TableWorkloadExecutionFailure> { - let mut trace = Trace::default(); - for interaction in &case.interactions { - trace.push(TableWorkloadEvent::Executed(interaction.clone())); - } - - let outcome = execute_interactions( +) -> Result { + execute_interactions( &case.scenario, &case.schema, case.num_connections, case.interactions.clone(), T::build_engine, - )?; - - Ok(RunRecord { - subsystem: T::target_name(), - seed: case.seed, - case: case.clone(), - trace: Some(trace), - outcome, - }) + ) } -pub fn run_generated_with_config_and_scenario( +pub(crate) fn run_generated_with_config_and_scenario( seed: DstSeed, scenario: TableScenarioId, config: RunConfig, @@ -88,22 +66,15 @@ pub fn run_generated_with_config_and_scenario( run_generated_with_engine(seed, scenario, config, T::build_engine) } -pub fn save_case(path: impl AsRef, case: &TableWorkloadCase) -> anyhow::Result<()> { +pub(crate) fn save_case(path: impl AsRef, case: &TableWorkloadCase) -> anyhow::Result<()> { save_json(path, case) } -pub fn load_case(path: impl AsRef) -> anyhow::Result { +pub(crate) fn load_case(path: impl AsRef) -> anyhow::Result { load_json(path) } -pub fn failure_reason(case: &TableWorkloadCase) -> anyhow::Result { - match run_case_detailed::(case) { - Ok(_) => anyhow::bail!("case did not fail"), - Err(failure) => Ok(failure.reason), - } -} - -pub fn shrink_failure( +pub(crate) fn shrink_failure( case: &TableWorkloadCase, failure: &TableWorkloadExecutionFailure, ) -> anyhow::Result { diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index df038999af5..449f06517d0 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -1,5 +1,5 @@ //! Concrete simulation targets. pub mod datastore; -pub mod harness; +pub(crate) mod harness; pub mod relational_db; diff --git a/crates/dst/src/targets/relational_db.rs b/crates/dst/src/targets/relational_db.rs index 7fa4bcb8a4d..0b494f6e54b 100644 --- a/crates/dst/src/targets/relational_db.rs +++ b/crates/dst/src/targets/relational_db.rs @@ -25,39 +25,26 @@ use spacetimedb_schema::{ use spacetimedb_table::page_pool::PagePool; use crate::{ - bugbase::{load_json, save_json, BugArtifact}, config::RunConfig, schema::{SchemaPlan, SimRow}, seed::DstSeed, - subsystem::{DstSubsystem, RunRecord}, targets::harness::{self, TableTargetHarness}, workload::table_ops::{ - ConnectionWriteState, TableProperty, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, - TableWorkloadEvent, TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, + ConnectionWriteState, PropertyBound, TableProperty, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, + TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, }, }; -pub type RelationalDbScenario = TableScenarioId; pub type RelationalDbSimulatorCase = TableWorkloadCase; -pub type RelationalDbInteraction = TableWorkloadInteraction; -pub type RelationalDbSimulatorEvent = TableWorkloadEvent; pub type RelationalDbSimulatorOutcome = TableWorkloadOutcome; pub type RelationalDbExecutionFailure = TableWorkloadExecutionFailure; -pub type RelationalDbBugArtifact = BugArtifact; -pub type RelationalDbRunConfig = RunConfig; - -/// DST subsystem wrapper around the relational-db simulator target. -pub struct RelationalDbSimulatorSubsystem; +type RelationalDbInteraction = TableWorkloadInteraction; struct RelationalDbTarget; impl TableTargetHarness for RelationalDbTarget { type Engine = RelationalDbEngine; - fn target_name() -> &'static str { - RelationalDbSimulatorSubsystem::name() - } - fn connection_seed_discriminator() -> u64 { 31 } @@ -67,41 +54,9 @@ impl TableTargetHarness for RelationalDbTarget { } } -impl DstSubsystem for RelationalDbSimulatorSubsystem { - type Case = RelationalDbSimulatorCase; - type Event = RelationalDbSimulatorEvent; - type Outcome = RelationalDbSimulatorOutcome; - - fn name() -> &'static str { - "relational-db-simulator" - } - - fn generate_case(seed: DstSeed) -> Self::Case { - harness::generate_case::(seed, RelationalDbScenario::RandomCrud) - } - - fn run_case(case: &Self::Case) -> anyhow::Result> { - harness::run_case_detailed::(case).map_err(|failure| { - anyhow::anyhow!( - "relational db simulator failed at step {}: {}", - failure.step_index, - failure.reason - ) - }) - } -} - -pub fn generate_case(seed: DstSeed) -> RelationalDbSimulatorCase { - generate_case_for_scenario(seed, RelationalDbScenario::RandomCrud) -} - -pub fn generate_case_for_scenario(seed: DstSeed, scenario: RelationalDbScenario) -> RelationalDbSimulatorCase { - harness::generate_case::(seed, scenario) -} - pub fn materialize_case( seed: DstSeed, - scenario: RelationalDbScenario, + scenario: TableScenarioId, max_interactions: usize, ) -> RelationalDbSimulatorCase { harness::materialize_case::(seed, scenario, max_interactions) @@ -109,28 +64,14 @@ pub fn materialize_case( pub fn run_case_detailed( case: &RelationalDbSimulatorCase, -) -> Result< - RunRecord, - RelationalDbExecutionFailure, -> { +) -> Result { harness::run_case_detailed::(case) } -pub fn run_generated_stream(seed: DstSeed, max_interactions: usize) -> anyhow::Result { - run_generated_with_config(seed, RelationalDbRunConfig::with_max_interactions(max_interactions)) -} - -pub fn run_generated_with_config( - seed: DstSeed, - config: RelationalDbRunConfig, -) -> anyhow::Result { - run_generated_with_config_and_scenario(seed, RelationalDbScenario::RandomCrud, config) -} - pub fn run_generated_with_config_and_scenario( seed: DstSeed, - scenario: RelationalDbScenario, - config: RelationalDbRunConfig, + scenario: TableScenarioId, + config: RunConfig, ) -> anyhow::Result { harness::run_generated_with_config_and_scenario::(seed, scenario, config) } @@ -143,14 +84,6 @@ pub fn load_case(path: impl AsRef) -> anyhow::Result, artifact: &RelationalDbBugArtifact) -> anyhow::Result<()> { - save_json(path, artifact) -} - -pub fn load_bug_artifact(path: impl AsRef) -> anyhow::Result { - load_json(path) -} - pub fn shrink_failure( case: &RelationalDbSimulatorCase, failure: &RelationalDbExecutionFailure, @@ -235,6 +168,45 @@ impl RelationalDbEngine { rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) } + + fn fresh_range_scan( + &self, + table_id: TableId, + cols: &[u16], + lower: &PropertyBound, + upper: &PropertyBound, + ) -> anyhow::Result> { + let tx = self.db.begin_tx(Workload::ForTests); + let cols = cols.iter().copied().collect::(); + let lower = lower.to_range_bound(); + let upper = upper.to_range_bound(); + let rows = self + .db + .iter_by_col_range(&tx, table_id, cols, (lower, upper))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect(); + let _ = self.db.release_tx(tx); + Ok(rows) + } + + fn in_tx_range_scan( + &self, + tx: &RelMutTx, + table_id: TableId, + cols: &[u16], + lower: &PropertyBound, + upper: &PropertyBound, + ) -> anyhow::Result> { + let cols = cols.iter().copied().collect::(); + let lower = lower.to_range_bound(); + let upper = upper.to_range_bound(); + let rows = self + .db + .iter_by_col_range_mut(tx, table_id, cols, (lower, upper))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect(); + Ok(rows) + } } impl TableWorkloadEngine for RelationalDbEngine { @@ -274,7 +246,7 @@ impl TableWorkloadEngine for RelationalDbEngine { } RelationalDbInteraction::Insert { conn, table, row } => { self.with_mut_tx(*conn, *table, |db, table_id, tx| { - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + let bsatn = row.to_bsatn().map_err(|err: anyhow::Error| err.to_string())?; db.insert(tx, table_id, &bsatn) .map_err(|err| format!("insert failed: {err}"))?; Ok(()) @@ -374,6 +346,57 @@ impl TableWorkloadEngine for RelationalDbEngine { return Err(format!("row count mismatch: expected={expected} actual={actual}")); } } + RelationalDbInteraction::Check(TableProperty::RangeScanInConnection { + conn, + table, + cols, + lower, + upper, + expected_rows, + }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let mut actual_rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { + self.in_tx_range_scan(tx, table_id, cols, lower, upper) + .map_err(|err| format!("in-tx range scan failed: {err}"))? + } else { + self.fresh_range_scan(table_id, cols, lower, upper) + .map_err(|err| format!("fresh range scan failed: {err}"))? + }; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + let mut expected_rows = expected_rows.clone(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + if actual_rows != expected_rows { + return Err(format!( + "connection range scan mismatch on table {table}, cols={cols:?}: expected={expected_rows:?} actual={actual_rows:?}" + )); + } + } + RelationalDbInteraction::Check(TableProperty::RangeScanFresh { + table, + cols, + lower, + upper, + expected_rows, + }) => { + let table_id = *self + .table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let mut actual_rows = self + .fresh_range_scan(table_id, cols, lower, upper) + .map_err(|err| format!("fresh range scan failed: {err}"))?; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + let mut expected_rows = expected_rows.clone(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + if actual_rows != expected_rows { + return Err(format!( + "fresh range scan mismatch on table {table}, cols={cols:?}: expected={expected_rows:?} actual={actual_rows:?}" + )); + } + } RelationalDbInteraction::Check(TableProperty::TablesMatchFresh { left, right }) => { let left_rows = self .collect_rows_for_table(*left) @@ -457,10 +480,11 @@ fn install_schema(db: &RelationalDB, schema: &SchemaPlan) -> anyhow::Result>().join("_"); indexes.push(IndexSchema::for_test( - format!("{}_c{col}_idx", table.name), - BTreeAlgorithm::from(col), + format!("{}_{}_idx", table.name, cols_name), + BTreeAlgorithm::from(cols.iter().copied().collect::()), )); } let constraints = vec![ConstraintSchema::unique_for_test( @@ -493,40 +517,9 @@ fn install_schema(db: &RelationalDB, schema: &SchemaPlan) -> anyhow::Result &'static Mutex<()> { - static LOCK: OnceLock> = OnceLock::new(); - LOCK.get_or_init(|| Mutex::new(())) - } - - #[test] - fn generated_case_replays_identically() { - let _guard = test_lock().lock().unwrap_or_else(|err| err.into_inner()); - let artifact = run_generated::(DstSeed(13)).expect("run relational db case"); - let replayed = rerun_case::(&artifact).expect("rerun relational db case"); - assert_eq!(artifact.case, replayed.case); - assert_eq!(artifact.trace, replayed.trace); - assert_eq!(artifact.outcome, replayed.outcome); - } - - #[test] - fn banking_generation_uses_fixed_schema() { - let case = generate_case_for_scenario(DstSeed(4242), RelationalDbScenario::Banking); - assert_eq!(case.scenario, RelationalDbScenario::Banking); - assert_eq!(case.schema.tables.len(), 2); - assert_eq!(case.schema.tables[0].name, "debit_accounts"); - assert_eq!(case.schema.tables[1].name, "credit_accounts"); - } +fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) } diff --git a/crates/dst/src/trace.rs b/crates/dst/src/trace.rs deleted file mode 100644 index 8251331ccf3..00000000000 --- a/crates/dst/src/trace.rs +++ /dev/null @@ -1,85 +0,0 @@ -//! Trace representation for deterministic runs. -//! -//! Each event gets a monotonically increasing `step_id`. Additional metadata is -//! optional so simple simulations can use plain event streams while richer -//! schedulers can attach logical time, actor ids, or resource ids. - -/// One event plus optional metadata captured during execution. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct StampedEvent { - /// Monotonic step number within the trace. - pub step_id: u64, - /// Optional logical time supplied by a scheduler-style simulation. - pub logical_time: Option, - /// Optional actor identity for actor-driven simulations. - pub actor_id: Option, - /// Optional resource identity such as a lock, table, or replica id. - pub resource_id: Option, - /// Target-specific event payload. - pub event: E, -} - -/// Ordered event log for one deterministic run. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Trace { - events: Vec>, - next_step_id: u64, -} - -impl Trace { - pub fn from_events(events: Vec) -> Self { - let mut trace = Self::default(); - for event in events { - trace.push(event); - } - trace - } - - pub fn push(&mut self, event: E) { - self.push_stamped(StampedEvent { - step_id: self.next_step_id, - logical_time: None, - actor_id: None, - resource_id: None, - event, - }); - } - - pub fn push_with_meta( - &mut self, - event: E, - logical_time: Option, - actor_id: Option, - resource_id: Option, - ) { - self.push_stamped(StampedEvent { - step_id: self.next_step_id, - logical_time, - actor_id, - resource_id, - event, - }); - } - - pub fn as_slice(&self) -> &[StampedEvent] { - &self.events - } - - pub fn into_events(self) -> Vec> { - self.events - } - - fn push_stamped(&mut self, stamped: StampedEvent) { - self.next_step_id = stamped.step_id + 1; - self.events.push(stamped); - } -} - -impl Default for Trace { - fn default() -> Self { - Self { - events: Vec::new(), - next_step_id: 0, - } - } -} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index a4bed7d16fe..f54dfe1eeec 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -1,7 +1,7 @@ use std::collections::VecDeque; use crate::{ - schema::SchemaPlan, + schema::{SchemaPlan, SimRow, TablePlan}, seed::{DstRng, DstSeed}, }; @@ -72,6 +72,10 @@ impl<'a> ScenarioPlanner<'a> { self.model.visible_rows(conn, table) } + pub fn committed_rows(&self, table: usize) -> Vec { + self.model.committed_rows(table) + } + pub fn make_row(&mut self, table: usize) -> crate::schema::SimRow { self.model.make_row(self.rng, table) } @@ -92,6 +96,10 @@ impl<'a> ScenarioPlanner<'a> { self.model.connections[conn].in_tx } + pub fn table_plan(&self, table: usize) -> &TablePlan { + &self.model.schema.tables[table] + } + pub fn push_interaction(&mut self, interaction: TableWorkloadInteraction) { self.pending.push_back(interaction); } diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index 731fd24ba46..d685cbe388b 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -7,11 +7,10 @@ mod runner; mod scenarios; mod types; -pub use generation::{InteractionStream, ScenarioPlanner}; -pub use properties::{followup_properties_after_commit, property_interaction, TableProperty}; -pub use runner::{execute_interactions, run_generated_with_engine}; -pub use scenarios::{default_target_ops, BankingScenario, RandomCrudScenario, TableScenarioId}; -pub use types::{ - ConnectionWriteState, TableScenario, TableWorkloadCase, TableWorkloadEngine, TableWorkloadEvent, - TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, -}; +pub(crate) use generation::InteractionStream; +pub(crate) use properties::{followup_properties_after_commit, property_interaction}; +pub use properties::{PropertyBound, TableProperty}; +pub(crate) use runner::{execute_interactions, run_generated_with_engine}; +pub use scenarios::TableScenarioId; +pub(crate) use types::{ConnectionWriteState, TableScenario, TableWorkloadEngine}; +pub use types::{TableWorkloadCase, TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index 26700cc6663..e83668cbc63 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -78,6 +78,10 @@ impl GenerationModel { rows } + pub(crate) fn committed_rows(&self, table: usize) -> Vec { + self.committed[table].clone() + } + pub(crate) fn active_writer(&self) -> Option { self.active_writer } diff --git a/crates/dst/src/workload/table_ops/properties.rs b/crates/dst/src/workload/table_ops/properties.rs index 43eda15d5f6..5aab032126b 100644 --- a/crates/dst/src/workload/table_ops/properties.rs +++ b/crates/dst/src/workload/table_ops/properties.rs @@ -1,24 +1,78 @@ +use std::ops::Bound; + use serde::{Deserialize, Serialize}; use crate::schema::SimRow; use super::TableWorkloadInteraction; +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum PropertyBound { + Unbounded, + Included(SimRow), + Excluded(SimRow), +} + +impl PropertyBound { + pub fn to_range_bound(&self) -> Bound { + match self { + Self::Unbounded => Bound::Unbounded, + Self::Included(key) => Bound::Included(key.to_algebraic_value()), + Self::Excluded(key) => Bound::Excluded(key.to_algebraic_value()), + } + } +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum TableProperty { - VisibleInConnection { conn: usize, table: usize, row: SimRow }, - MissingInConnection { conn: usize, table: usize, row: SimRow }, - VisibleFresh { table: usize, row: SimRow }, - MissingFresh { table: usize, row: SimRow }, - RowCountFresh { table: usize, expected: u64 }, - TablesMatchFresh { left: usize, right: usize }, + VisibleInConnection { + conn: usize, + table: usize, + row: SimRow, + }, + MissingInConnection { + conn: usize, + table: usize, + row: SimRow, + }, + VisibleFresh { + table: usize, + row: SimRow, + }, + MissingFresh { + table: usize, + row: SimRow, + }, + RowCountFresh { + table: usize, + expected: u64, + }, + RangeScanInConnection { + conn: usize, + table: usize, + cols: Vec, + lower: PropertyBound, + upper: PropertyBound, + expected_rows: Vec, + }, + RangeScanFresh { + table: usize, + cols: Vec, + lower: PropertyBound, + upper: PropertyBound, + expected_rows: Vec, + }, + TablesMatchFresh { + left: usize, + right: usize, + }, } -pub fn property_interaction(property: TableProperty) -> TableWorkloadInteraction { +pub(crate) fn property_interaction(property: TableProperty) -> TableWorkloadInteraction { TableWorkloadInteraction::Check(property) } -pub fn followup_properties_after_commit( +pub(crate) fn followup_properties_after_commit( scenario_commit_properties: Vec, inserts: Vec<(usize, SimRow)>, deletes: Vec<(usize, SimRow)>, diff --git a/crates/dst/src/workload/table_ops/scenarios/banking.rs b/crates/dst/src/workload/table_ops/scenarios/banking.rs index a94804ea05e..ccf4274643b 100644 --- a/crates/dst/src/workload/table_ops/scenarios/banking.rs +++ b/crates/dst/src/workload/table_ops/scenarios/banking.rs @@ -23,7 +23,7 @@ pub fn generate_schema() -> SchemaPlan { ty: AlgebraicType::U64, }, ], - secondary_index_col: Some(1), + extra_indexes: vec![vec![1]], }, TablePlan { name: "credit_accounts".into(), @@ -37,7 +37,7 @@ pub fn generate_schema() -> SchemaPlan { ty: AlgebraicType::U64, }, ], - secondary_index_col: Some(1), + extra_indexes: vec![vec![1]], }, ], } diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs index f7e3ea1698e..28cdcc6ed28 100644 --- a/crates/dst/src/workload/table_ops/scenarios/mod.rs +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -10,23 +10,23 @@ use super::{ }; #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub struct RandomCrudScenario; +pub(crate) struct RandomCrudScenario; #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub struct BankingScenario; +pub(crate) struct IndexedRangesScenario; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct BankingScenario; #[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] pub enum TableScenarioId { #[default] RandomCrud, + IndexedRanges, Banking, } impl TableScenario for RandomCrudScenario { - fn name(&self) -> &'static str { - "random-crud" - } - fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { random_crud::generate_schema(rng) } @@ -45,10 +45,6 @@ impl TableScenario for RandomCrudScenario { } impl TableScenario for BankingScenario { - fn name(&self) -> &'static str { - "banking" - } - fn generate_schema(&self, _rng: &mut DstRng) -> SchemaPlan { banking::generate_schema() } @@ -68,17 +64,29 @@ impl TableScenario for BankingScenario { } } -impl TableScenario for TableScenarioId { - fn name(&self) -> &'static str { - match self { - Self::RandomCrud => RandomCrudScenario.name(), - Self::Banking => BankingScenario.name(), - } +impl TableScenario for IndexedRangesScenario { + fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { + random_crud::generate_indexed_ranges_schema(rng) } + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + random_crud::validate_outcome(schema, outcome) + } + + fn commit_properties(&self) -> Vec { + Vec::new() + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { + random_crud::fill_pending_indexed_ranges(planner, conn); + } +} + +impl TableScenario for TableScenarioId { fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { match self { Self::RandomCrud => RandomCrudScenario.generate_schema(rng), + Self::IndexedRanges => IndexedRangesScenario.generate_schema(rng), Self::Banking => BankingScenario.generate_schema(rng), } } @@ -86,6 +94,7 @@ impl TableScenario for TableScenarioId { fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { match self { Self::RandomCrud => RandomCrudScenario.validate_outcome(schema, outcome), + Self::IndexedRanges => IndexedRangesScenario.validate_outcome(schema, outcome), Self::Banking => BankingScenario.validate_outcome(schema, outcome), } } @@ -93,6 +102,7 @@ impl TableScenario for TableScenarioId { fn commit_properties(&self) -> Vec { match self { Self::RandomCrud => RandomCrudScenario.commit_properties(), + Self::IndexedRanges => IndexedRangesScenario.commit_properties(), Self::Banking => BankingScenario.commit_properties(), } } @@ -100,11 +110,8 @@ impl TableScenario for TableScenarioId { fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { match self { Self::RandomCrud => RandomCrudScenario.fill_pending(planner, conn), + Self::IndexedRanges => IndexedRangesScenario.fill_pending(planner, conn), Self::Banking => BankingScenario.fill_pending(planner, conn), } } } - -pub fn default_target_ops(rng: &mut DstRng) -> usize { - 24 + rng.index(24) -} diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index 58aca87a72a..ccf4db91726 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -1,3 +1,5 @@ +use std::cmp::Ordering; + use spacetimedb_sats::AlgebraicType; use crate::{ @@ -7,31 +9,129 @@ use crate::{ use super::super::{ generation::ScenarioPlanner, - properties::{property_interaction, TableProperty}, + properties::{property_interaction, PropertyBound, TableProperty}, TableWorkloadOutcome, }; +#[derive(Clone, Copy)] +struct ScenarioTuning { + min_tables: usize, + table_count_choices: usize, + min_extra_cols: usize, + extra_col_choices: usize, + preferred_range_cols: usize, + prefer_range_compatible_pct: usize, + prefer_u64_pct: usize, + single_index_pct: usize, + composite2_index_pct: usize, + composite3_index_pct: usize, + range_probe_pct: usize, + in_tx_probe_pct: usize, + composite_probe_pct: usize, + insert_pct: usize, + begin_tx_pct: usize, + commit_tx_pct: usize, + rollback_tx_pct: usize, +} + +const RANDOM_CRUD_TUNING: ScenarioTuning = ScenarioTuning { + min_tables: 1, + table_count_choices: 3, + min_extra_cols: 1, + extra_col_choices: 4, + preferred_range_cols: 2, + prefer_range_compatible_pct: 65, + prefer_u64_pct: 75, + single_index_pct: 70, + composite2_index_pct: 65, + composite3_index_pct: 30, + range_probe_pct: 10, + in_tx_probe_pct: 60, + composite_probe_pct: 70, + insert_pct: 65, + begin_tx_pct: 20, + commit_tx_pct: 15, + rollback_tx_pct: 10, +}; + +const INDEXED_RANGES_TUNING: ScenarioTuning = ScenarioTuning { + min_tables: 1, + table_count_choices: 2, + min_extra_cols: 3, + extra_col_choices: 3, + preferred_range_cols: 3, + prefer_range_compatible_pct: 90, + prefer_u64_pct: 90, + single_index_pct: 100, + composite2_index_pct: 100, + composite3_index_pct: 75, + range_probe_pct: 45, + in_tx_probe_pct: 65, + composite_probe_pct: 90, + insert_pct: 55, + begin_tx_pct: 20, + commit_tx_pct: 15, + rollback_tx_pct: 8, +}; + pub fn generate_schema(rng: &mut DstRng) -> SchemaPlan { - let table_count = rng.index(3) + 1; + generate_schema_with_tuning(rng, RANDOM_CRUD_TUNING) +} + +pub fn generate_indexed_ranges_schema(rng: &mut DstRng) -> SchemaPlan { + generate_schema_with_tuning(rng, INDEXED_RANGES_TUNING) +} + +fn generate_schema_with_tuning(rng: &mut DstRng, tuning: ScenarioTuning) -> SchemaPlan { + let table_count = tuning.min_tables + rng.index(tuning.table_count_choices); let mut tables = Vec::with_capacity(table_count); for table_idx in 0..table_count { - let extra_cols = rng.index(3); + let extra_cols = tuning.min_extra_cols + rng.index(tuning.extra_col_choices); let mut columns = vec![ColumnPlan { name: "id".into(), ty: AlgebraicType::U64, }]; for col_idx in 0..extra_cols { + let ty = if col_idx < tuning.preferred_range_cols && rng.index(100) < tuning.prefer_range_compatible_pct { + if rng.index(100) < tuning.prefer_u64_pct { + AlgebraicType::U64 + } else { + AlgebraicType::Bool + } + } else { + generate_supported_type(rng) + }; columns.push(ColumnPlan { name: format!("c{table_idx}_{col_idx}"), - ty: generate_supported_type(rng), + ty, }); } - let secondary_index_col = (columns.len() > 1 && rng.index(100) < 50).then_some(1); + let mut extra_indexes = Vec::new(); + let non_primary_range_cols = columns + .iter() + .enumerate() + .skip(1) + .filter(|(_, col)| is_range_compatible(&col.ty)) + .map(|(idx, _)| idx as u16) + .collect::>(); + if let Some(&col) = non_primary_range_cols.first() + && rng.index(100) < tuning.single_index_pct + { + extra_indexes.push(vec![col]); + } + if non_primary_range_cols.len() >= 2 && rng.index(100) < tuning.composite2_index_pct { + extra_indexes.push(non_primary_range_cols[..2].to_vec()); + } + if non_primary_range_cols.len() >= 3 && rng.index(100) < tuning.composite3_index_pct { + extra_indexes.push(non_primary_range_cols[..3].to_vec()); + } + extra_indexes.sort(); + extra_indexes.dedup(); tables.push(TablePlan { name: format!("dst_table_{table_idx}_{}", rng.next_u64() % 10_000), columns, - secondary_index_col, + extra_indexes, }); } @@ -43,13 +143,25 @@ pub fn validate_outcome(_schema: &SchemaPlan, _outcome: &TableWorkloadOutcome) - } pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { - if planner.maybe_control_tx(conn, 20, 15, 10) { + fill_pending_with_tuning(planner, conn, RANDOM_CRUD_TUNING); +} + +pub fn fill_pending_indexed_ranges(planner: &mut ScenarioPlanner<'_>, conn: usize) { + fill_pending_with_tuning(planner, conn, INDEXED_RANGES_TUNING); +} + +fn fill_pending_with_tuning(planner: &mut ScenarioPlanner<'_>, conn: usize, tuning: ScenarioTuning) { + if planner.maybe_control_tx(conn, tuning.begin_tx_pct, tuning.commit_tx_pct, tuning.rollback_tx_pct) { return; } let table = planner.choose_table(); + if planner.roll_percent(tuning.range_probe_pct) && maybe_emit_range_probe(planner, conn, table, tuning) { + return; + } + let visible_rows = planner.visible_rows(conn, table); - let choose_insert = visible_rows.is_empty() || planner.roll_percent(65); + let choose_insert = visible_rows.is_empty() || planner.roll_percent(tuning.insert_pct); if choose_insert { let row = planner.make_row(table); planner.insert(conn, table, row.clone()); @@ -86,3 +198,129 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { planner.push_interaction(property_interaction(TableProperty::MissingFresh { table, row })); } } + +fn is_range_compatible(ty: &AlgebraicType) -> bool { + matches!(ty, AlgebraicType::U64 | AlgebraicType::Bool) +} + +fn maybe_emit_range_probe( + planner: &mut ScenarioPlanner<'_>, + conn: usize, + table: usize, + tuning: ScenarioTuning, +) -> bool { + let table_plan = planner.table_plan(table); + let mut probe_indexes = vec![vec![0]]; + probe_indexes.extend( + table_plan + .extra_indexes + .iter() + .filter(|cols| { + cols.iter() + .all(|&col| is_range_compatible(&table_plan.columns[col as usize].ty)) + }) + .cloned(), + ); + if probe_indexes.is_empty() { + return false; + } + + let use_connection_view = planner.in_tx(conn) && planner.roll_percent(tuning.in_tx_probe_pct); + let basis_rows = if use_connection_view { + planner.visible_rows(conn, table) + } else { + planner.committed_rows(table) + }; + if basis_rows.is_empty() { + return false; + } + + let composite_indexes = probe_indexes + .iter() + .filter(|cols| cols.len() > 1) + .cloned() + .collect::>(); + let cols = if !composite_indexes.is_empty() && planner.roll_percent(tuning.composite_probe_pct) { + composite_indexes[planner.choose_index(composite_indexes.len())].clone() + } else { + probe_indexes[planner.choose_index(probe_indexes.len())].clone() + }; + + let lower = choose_bound(planner, &basis_rows, &cols); + let upper = choose_bound(planner, &basis_rows, &cols); + let (lower, upper) = normalize_bounds(lower, upper); + let mut expected_rows = basis_rows + .into_iter() + .filter(|row| key_in_bounds(&row.project_key(&cols).to_algebraic_value(), &lower, &upper)) + .collect::>(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, &cols)); + + let property = if use_connection_view { + TableProperty::RangeScanInConnection { + conn, + table, + cols, + lower, + upper, + expected_rows, + } + } else { + TableProperty::RangeScanFresh { + table, + cols, + lower, + upper, + expected_rows, + } + }; + planner.push_interaction(property_interaction(property)); + true +} + +fn choose_bound(planner: &mut ScenarioPlanner<'_>, rows: &[crate::schema::SimRow], cols: &[u16]) -> PropertyBound { + if planner.roll_percent(20) { + return PropertyBound::Unbounded; + } + let row = &rows[planner.choose_index(rows.len())]; + let key = row.project_key(cols); + if planner.roll_percent(50) { + PropertyBound::Included(key) + } else { + PropertyBound::Excluded(key) + } +} + +fn normalize_bounds(lower: PropertyBound, upper: PropertyBound) -> (PropertyBound, PropertyBound) { + match (bound_key(&lower), bound_key(&upper)) { + (Some(left), Some(right)) if left > right => (upper, lower), + _ => (lower, upper), + } +} + +fn bound_key(bound: &PropertyBound) -> Option { + match bound { + PropertyBound::Unbounded => None, + PropertyBound::Included(key) | PropertyBound::Excluded(key) => Some(key.to_algebraic_value()), + } +} + +fn key_in_bounds(key: &spacetimedb_sats::AlgebraicValue, lower: &PropertyBound, upper: &PropertyBound) -> bool { + let lower_ok = match lower { + PropertyBound::Unbounded => true, + PropertyBound::Included(bound) => key >= &bound.to_algebraic_value(), + PropertyBound::Excluded(bound) => key > &bound.to_algebraic_value(), + }; + let upper_ok = match upper { + PropertyBound::Unbounded => true, + PropertyBound::Included(bound) => key <= &bound.to_algebraic_value(), + PropertyBound::Excluded(bound) => key < &bound.to_algebraic_value(), + }; + lower_ok && upper_ok +} + +fn compare_rows_by_cols(lhs: &crate::schema::SimRow, rhs: &crate::schema::SimRow, cols: &[u16]) -> Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) +} diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index 20f731fc8cc..e9ffb65c438 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -11,8 +11,7 @@ use super::{generation::ScenarioPlanner, properties::TableProperty, scenarios::T /// /// A scenario supplies the initial schema, scenario-specific commit-time /// properties, and any final invariant over the collected outcome. -pub trait TableScenario: Clone { - fn name(&self) -> &'static str; +pub(crate) trait TableScenario: Clone { fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan; fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()>; fn commit_properties(&self) -> Vec; @@ -25,11 +24,11 @@ pub struct TableWorkloadCase { /// Seed used to derive schema and workload decisions. pub seed: crate::seed::DstSeed, /// Shared workload scenario identifier. - pub scenario: TableScenarioId, + pub(crate) scenario: TableScenarioId, /// Number of simulated client connections in the run. - pub num_connections: usize, + pub(crate) num_connections: usize, /// Initial schema installed into target before replaying interactions. - pub schema: SchemaPlan, + pub(crate) schema: SchemaPlan, /// Materialized interaction trace for replay and shrinking. pub interactions: Vec, } @@ -45,12 +44,6 @@ pub enum TableWorkloadInteraction { Check(TableProperty), } -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub enum TableWorkloadEvent { - /// One interaction executed successfully. - Executed(TableWorkloadInteraction), -} - /// Final state gathered from a table-workload engine after execution ends. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct TableWorkloadOutcome { @@ -68,18 +61,18 @@ pub struct TableWorkloadExecutionFailure { /// Target-provided error message. pub reason: String, /// Interaction that triggered the failure. - pub interaction: TableWorkloadInteraction, + pub(crate) interaction: TableWorkloadInteraction, } /// Minimal engine interface implemented by concrete table-oriented targets. -pub trait TableWorkloadEngine { +pub(crate) trait TableWorkloadEngine { fn execute(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String>; fn collect_outcome(&mut self) -> anyhow::Result; fn finish(&mut self); } /// Per-connection write transaction bookkeeping shared by locking targets. -pub struct ConnectionWriteState { +pub(crate) struct ConnectionWriteState { /// Open mutable transaction handle for each simulated connection. pub tx_by_connection: Vec>, /// Connection that currently owns the single-writer lock, if any. From 749b1184e20fa4f66025894aa497d30cd2b4d1f9 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 23 Apr 2026 00:51:36 +0530 Subject: [PATCH 09/74] target owned properties --- crates/dst/README.md | 194 ++++++++++- crates/dst/src/targets/datastore.rs | 288 +++++++--------- crates/dst/src/targets/mod.rs | 1 + crates/dst/src/targets/properties.rs | 319 ++++++++++++++++++ crates/dst/src/targets/relational_db.rs | 305 +++++++---------- .../dst/src/workload/table_ops/generation.rs | 53 +-- crates/dst/src/workload/table_ops/mod.rs | 3 - crates/dst/src/workload/table_ops/model.rs | 49 +-- .../dst/src/workload/table_ops/properties.rs | 89 ----- crates/dst/src/workload/table_ops/runner.rs | 11 +- .../workload/table_ops/scenarios/banking.rs | 54 +-- .../src/workload/table_ops/scenarios/mod.rs | 26 +- .../table_ops/scenarios/random_crud.rs | 172 +--------- crates/dst/src/workload/table_ops/types.rs | 6 +- 14 files changed, 788 insertions(+), 782 deletions(-) create mode 100644 crates/dst/src/targets/properties.rs delete mode 100644 crates/dst/src/workload/table_ops/properties.rs diff --git a/crates/dst/README.md b/crates/dst/README.md index 16092901f61..768bf7fea21 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -2,6 +2,31 @@ Deterministic simulation testing utilities for SpacetimeDB. +## DST In A Nutshell + +Current DST is a CLI-driven simulator pipeline: + +1. the CLI picks a `target`, `scenario`, seed, and run budget +2. the workload generator produces a deterministic stream or materialized case +3. the target installs schema and executes interactions against a real engine +4. properties are checked during execution and against the final outcome +5. on failure, the saved case can be replayed and shrunk from CLI + +Today the main shared workload family is `workload/table_ops/`. +It is good for targets that behave like transactional tables: + +- schema generation +- inserts / deletes +- transaction begin / commit / rollback +- range scans and visibility checks +- scenario-specific properties such as `banking` + +The important split is: + +- workload code decides what to try +- target code decides how to execute it on a concrete engine +- properties decide whether the observed behavior is valid + ## What Is In This Crate This crate contains reusable pieces for building deterministic simulations, @@ -52,21 +77,38 @@ That separation is intentional: The main reusable DST workload now lives in `workload/table_ops/`: 1. `types.rs` - common scenario, interaction, event, outcome, and engine traits -2. `properties.rs` - first-class properties such as visibility, row-count, and banking table - matching -3. `scenarios/` - scenario-specific schema generation like `random_crud` and `banking` -4. `model.rs` + common scenario, interaction, outcome, and engine traits +2. `scenarios/` + scenario-specific schema generation like `random_crud`, `indexed_ranges`, + and `banking` +3. `model.rs` generator model and expected-state model -5. `generation.rs` +4. `generation.rs` `InteractionStream` and scenario-aware workload planning -6. `runner.rs` +5. `runner.rs` generic execute/run helpers shared by multiple targets Concrete targets like `targets/datastore.rs` and `targets/relational_db.rs` -reuse that workload and swap in target-specific engines. +reuse that workload and swap in target-specific engines and target-owned +properties. + +## Property Ownership + +Properties are now owned by targets, not by `workload/table_ops`. + +- workload emits only operations (`BeginTx`, `CommitTx`, `Insert`, `Delete`, ...) +- target execution code decides which properties to evaluate and when +- failure messages are tagged by property family for easier triage + +Current target-side property families include: + +- `PQS::InsertSelect` +- `PQS::IndexRangeExcluded` (composite index range behavior) +- `NoREC::SelectSelectOptimizer` +- `TLP::WhereTrueFalseNull` +- `TLP::UNIONAllPreservesCardinality` +- `DeleteSelect` +- shadow-style table consistency checks (for banking-like mirrored tables) ## Failure Flow @@ -94,6 +136,138 @@ DST workloads are run from CLI only. Use `random-crud` for broad coverage and `indexed-ranges` when you want to bias toward secondary/composite index range behavior without hardcoding a single historical bug. +## How To Add More Targets + +There are two extension patterns. + +### 1. Reuse `table_ops` + +Use this when the new engine still looks like a transactional table store. +Examples: + +- another datastore wrapper +- another relational layer +- a storage engine exposing the same table semantics through a different API + +In that case: + +1. add `targets/.rs` +2. reuse `TableWorkloadCase` and `TableScenarioId` +3. implement the target-specific engine bootstrap and row operations +4. expose the same CLI-facing functions used by `main.rs` + - `materialize_case` + - `run_case_detailed` + - `run_generated_with_config_and_scenario` + - `save_case` + - `load_case` + - `shrink_failure` +5. add the target to the CLI `TargetKind` + +This is the path `datastore` and `relational_db` use today. + +### 2. Add A New Workload Family + +Use this when the thing being tested is not naturally “tables plus tx”. +Examples: + +- commitlog replay +- crash / reopen / durability +- replication +- network partitions +- leader election + +Do not force those into `table_ops`. + +Instead, add a new workload family under `workload/`, for example: + +- `workload/commitlog_ops/` +- `workload/replication_ops/` + +That workload family should define its own: + +- case type +- interaction enum +- outcome type +- properties / invariants +- generator / stream planner +- runner helpers + +Then add a target that executes that workload against the real implementation. + +## Adding Commitlog Replay + +Commitlog replay should be a new workload family, not another `table_ops` +scenario. + +Good interaction examples: + +- `Append` +- `Flush` +- `Fsync` +- `Crash` +- `Reopen` +- `Replay` +- `CheckDurablePrefix` +- `CheckReplayedState` + +Good properties: + +- replay restores the same durable prefix +- non-durable suffix is not reported as committed after reopen +- replay is deterministic for the same saved case +- snapshot plus replay matches replay-only, if snapshots exist + +Suggested layout: + +- `workload/commitlog_ops/` +- `targets/commitlog.rs` + +If replay is exercised through `RelationalDB`, then use: + +- `workload/commitlog_ops/` +- `targets/relational_db_lifecycle.rs` + +But keep the workload family separate from `table_ops`. + +## Adding Replication + +Replication also should be its own workload family. + +Good interaction examples: + +- `ClientWrite` +- `Replicate` +- `DropMessage` +- `Partition` +- `HealPartition` +- `CrashReplica` +- `RestartReplica` +- `ElectLeader` +- `CheckReplicaState` + +Good properties: + +- committed prefix agreement +- no committed entry lost after restart +- followers do not apply invalid orderings +- replicas converge after heal +- read guarantees match the configured consistency level + +Suggested layout: + +- `workload/replication_ops/` +- `targets/replication.rs` + +This target will likely need a composed cluster fixture rather than the +single-engine shape used by current table targets. + +## Rule Of Thumb + +- If the test subject is “a DB that executes table operations”, reuse + `table_ops`. +- If the test subject is “a system with lifecycle, log, or network events”, + make a new workload family. + ## Current Scope This crate provides shared table workload generation, two concrete targets diff --git a/crates/dst/src/targets/datastore.rs b/crates/dst/src/targets/datastore.rs index 6b3ea2c703c..6dc6d6d831d 100644 --- a/crates/dst/src/targets/datastore.rs +++ b/crates/dst/src/targets/datastore.rs @@ -1,6 +1,6 @@ //! Randomized datastore simulator target built on the shared table workload. -use std::path::Path; +use std::{ops::Bound, path::Path}; use spacetimedb_datastore::{ execution_context::Workload, @@ -25,10 +25,13 @@ use crate::{ config::RunConfig, schema::{SchemaPlan, SimRow}, seed::DstSeed, - targets::harness::{self, TableTargetHarness}, + targets::{ + harness::{self, TableTargetHarness}, + properties::{self, TargetPropertyAccess, TargetPropertyState}, + }, workload::table_ops::{ - ConnectionWriteState, PropertyBound, TableProperty, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, - TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, + ConnectionWriteState, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, TableWorkloadExecutionFailure, + TableWorkloadInteraction, TableWorkloadOutcome, }, }; @@ -86,9 +89,12 @@ pub fn shrink_failure( /// Concrete datastore execution harness for the shared table workload. struct DatastoreEngine { + schema: SchemaPlan, datastore: Locking, table_ids: Vec, execution: ConnectionWriteState, + properties: TargetPropertyState, + step: u64, } impl DatastoreEngine { @@ -96,9 +102,12 @@ impl DatastoreEngine { let datastore = bootstrap_datastore()?; let table_ids = install_schema(&datastore, schema)?; Ok(Self { + schema: schema.clone(), datastore, table_ids, execution: ConnectionWriteState::new(num_connections), + properties: TargetPropertyState::default(), + step: 0, }) } @@ -163,13 +172,11 @@ impl DatastoreEngine { &self, table_id: TableId, cols: &[u16], - lower: &PropertyBound, - upper: &PropertyBound, + lower: Bound, + upper: Bound, ) -> anyhow::Result> { let tx = self.datastore.begin_tx(Workload::ForTests); let cols = cols.iter().copied().collect::(); - let lower = lower.to_range_bound(); - let upper = upper.to_range_bound(); let rows = self .datastore .iter_by_col_range_tx(&tx, table_id, cols, (lower, upper))? @@ -178,28 +185,101 @@ impl DatastoreEngine { Ok(rows) } - fn in_tx_range_scan( + fn table_id(&self, table: usize) -> Result { + self.table_ids + .get(table) + .copied() + .ok_or_else(|| format!("table {table} out of range")) + } + + fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { + let table_id = self.table_id(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + Ok(self + .datastore + .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + } else { + self.fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}")) + } + } + + fn count_rows_for_property(&self, table: usize) -> Result { + let table_id = self.table_id(table)?; + let tx = self.datastore.begin_tx(Workload::ForTests); + Ok(tx.row_count(table_id) as usize) + } + + fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + let table_id = self.table_id(table)?; + let tx = self.datastore.begin_tx(Workload::ForTests); + self.datastore + .iter_by_col_eq_tx(&tx, table_id, col, value) + .map(|rows| rows.count()) + .map_err(|err| format!("predicate query failed: {err}")) + } + + fn range_scan_for_property( &self, - tx: &MutTxId, - table_id: TableId, + table: usize, cols: &[u16], - lower: &PropertyBound, - upper: &PropertyBound, - ) -> anyhow::Result> { - let cols = cols.iter().copied().collect::(); - let lower = lower.to_range_bound(); - let upper = upper.to_range_bound(); - let rows = self - .datastore - .iter_by_col_range_mut_tx(tx, table_id, cols, (lower, upper))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect(); - Ok(rows) + lower: Bound, + upper: Bound, + ) -> Result, String> { + let table_id = self.table_id(table)?; + self.fresh_range_scan(table_id, cols, lower, upper) + .map_err(|err| format!("range scan failed: {err}")) + } + + fn with_property_state( + &mut self, + f: impl FnOnce(&TargetPropertyState, &Self) -> Result, + ) -> Result { + let state = std::mem::take(&mut self.properties); + let result = f(&state, self); + self.properties = state; + result + } +} + +impl TargetPropertyAccess for DatastoreEngine { + fn schema_plan(&self) -> &SchemaPlan { + &self.schema + } + + fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { + Self::lookup_in_connection(self, conn, table, id) + } + + fn collect_rows_for_table(&self, table: usize) -> Result, String> { + Self::collect_rows_for_table(self, table).map_err(|err| format!("collect rows failed: {err}")) + } + + fn count_rows(&self, table: usize) -> Result { + Self::count_rows_for_property(self, table) + } + + fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + Self::count_by_col_eq_for_property(self, table, col, value) + } + + fn range_scan( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + Self::range_scan_for_property(self, table, cols, lower, upper) } } impl TableWorkloadEngine for DatastoreEngine { fn execute(&mut self, interaction: &Interaction) -> Result<(), String> { + self.step = self.step.saturating_add(1); match interaction { Interaction::BeginTx { conn } => { self.execution.ensure_known_connection(*conn)?; @@ -226,6 +306,7 @@ impl TableWorkloadEngine for DatastoreEngine { .commit_mut_tx(tx) .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; self.execution.active_writer = None; + self.with_property_state(|state, access| properties::on_commit_or_rollback(state, access))?; } Interaction::RollbackTx { conn } => { self.execution.ensure_writer_owner(*conn, "rollback")?; @@ -234,8 +315,10 @@ impl TableWorkloadEngine for DatastoreEngine { .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = self.datastore.rollback_mut_tx(tx); self.execution.active_writer = None; + self.with_property_state(|state, access| properties::on_commit_or_rollback(state, access))?; } Interaction::Insert { conn, table, row } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { let bsatn = row.to_bsatn().map_err(|err: anyhow::Error| err.to_string())?; datastore @@ -243,8 +326,13 @@ impl TableWorkloadEngine for DatastoreEngine { .map_err(|err| format!("insert failed: {err}"))?; Ok(()) })?; + let step = self.step; + self.with_property_state(|state, access| { + properties::on_insert(state, access, step, *conn, *table, row, in_tx) + })?; } Interaction::Delete { conn, table, row } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { let deleted = datastore.delete_by_rel_mut_tx(tx, table_id, [row.to_product_value()]); if deleted != 1 { @@ -252,149 +340,10 @@ impl TableWorkloadEngine for DatastoreEngine { } Ok(()) })?; - } - Interaction::Check(TableProperty::VisibleInConnection { conn, table, row }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { - self.datastore - .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("in-tx lookup failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .any(|candidate| candidate == *row) - } else { - self.fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))? - == Some(row.clone()) - }; - if !found { - return Err(format!("row not visible in connection after write: {row:?}")); - } - } - Interaction::Check(TableProperty::MissingInConnection { conn, table, row }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { - self.datastore - .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("in-tx lookup failed: {err}"))? - .next() - .is_some() - } else { - self.fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))? - .is_some() - }; - if found { - return Err(format!("row still visible in connection after delete: {row:?}")); - } - } - Interaction::Check(TableProperty::VisibleFresh { table, row }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = self - .fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))?; - if found != Some(row.clone()) { - return Err(format!("fresh lookup mismatch: expected={row:?} actual={found:?}")); - } - } - Interaction::Check(TableProperty::MissingFresh { table, row }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - if self - .fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))? - .is_some() - { - return Err(format!("fresh lookup still found deleted row: {row:?}")); - } - } - Interaction::Check(TableProperty::RowCountFresh { table, expected }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let actual = self.datastore.begin_tx(Workload::ForTests).row_count(table_id); - if actual != *expected { - return Err(format!("row count mismatch: expected={expected} actual={actual}")); - } - } - Interaction::Check(TableProperty::RangeScanInConnection { - conn, - table, - cols, - lower, - upper, - expected_rows, - }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let mut actual_rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { - self.in_tx_range_scan(tx, table_id, cols, lower, upper) - .map_err(|err| format!("in-tx range scan failed: {err}"))? - } else { - self.fresh_range_scan(table_id, cols, lower, upper) - .map_err(|err| format!("fresh range scan failed: {err}"))? - }; - actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - let mut expected_rows = expected_rows.clone(); - expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - if actual_rows != expected_rows { - return Err(format!( - "connection range scan mismatch on table {table}, cols={cols:?}: expected={expected_rows:?} actual={actual_rows:?}" - )); - } - } - Interaction::Check(TableProperty::RangeScanFresh { - table, - cols, - lower, - upper, - expected_rows, - }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let mut actual_rows = self - .fresh_range_scan(table_id, cols, lower, upper) - .map_err(|err| format!("fresh range scan failed: {err}"))?; - actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - let mut expected_rows = expected_rows.clone(); - expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - if actual_rows != expected_rows { - return Err(format!( - "fresh range scan mismatch on table {table}, cols={cols:?}: expected={expected_rows:?} actual={actual_rows:?}" - )); - } - } - Interaction::Check(TableProperty::TablesMatchFresh { left, right }) => { - let left_rows = self - .collect_rows_for_table(*left) - .map_err(|err| format!("left table collect failed: {err}"))?; - let right_rows = self - .collect_rows_for_table(*right) - .map_err(|err| format!("right table collect failed: {err}"))?; - if left_rows != right_rows { - return Err(format!( - "fresh table mismatch: left_table={left} right_table={right} left={left_rows:?} right={right_rows:?}" - )); - } + let step = self.step; + self.with_property_state(|state, access| { + properties::on_delete(state, access, step, *conn, *table, row, in_tx) + })?; } } @@ -488,10 +437,3 @@ fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result std::cmp::Ordering { - lhs.project_key(cols) - .to_algebraic_value() - .cmp(&rhs.project_key(cols).to_algebraic_value()) - .then_with(|| lhs.values.cmp(&rhs.values)) -} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index 449f06517d0..2ae5a0e221a 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -2,4 +2,5 @@ pub mod datastore; pub(crate) mod harness; +pub(crate) mod properties; pub mod relational_db; diff --git a/crates/dst/src/targets/properties.rs b/crates/dst/src/targets/properties.rs new file mode 100644 index 00000000000..5bf16bda63b --- /dev/null +++ b/crates/dst/src/targets/properties.rs @@ -0,0 +1,319 @@ +//! Target-level property runtime shared by datastore-oriented targets. +//! +//! Properties are owned by targets (not workload generation). This keeps workloads as pure +//! operation streams and lets each target decide when and how to validate invariants. + +use std::ops::Bound; + +use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; + +use crate::schema::{SchemaPlan, SimRow}; + +/// Property types supported by target execution. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum TargetProperty { + InsertSelect, + DeleteSelect, + SelectSelectOptimizer, + WhereTrueFalseNull, + IndexRangeExcluded, + BankingTablesMatch, +} + +/// Target adapter for property evaluation. +pub(crate) trait TargetPropertyAccess { + fn schema_plan(&self) -> &SchemaPlan; + fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String>; + fn collect_rows_for_table(&self, table: usize) -> Result, String>; + fn count_rows(&self, table: usize) -> Result; + fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result; + fn range_scan( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String>; +} + +/// Mutable runtime state for target-owned properties. +/// +/// This is intentionally small today, but it is the anchor for adding stateful +/// properties later (history windows, cross-step state, learned predicates, etc). +#[derive(Debug, Clone)] +pub(crate) struct TargetPropertyState { + periodic_every: u64, + enabled: Vec, +} + +impl Default for TargetPropertyState { + fn default() -> Self { + Self { + periodic_every: 8, + enabled: vec![ + TargetProperty::InsertSelect, + TargetProperty::DeleteSelect, + TargetProperty::SelectSelectOptimizer, + TargetProperty::WhereTrueFalseNull, + TargetProperty::IndexRangeExcluded, + TargetProperty::BankingTablesMatch, + ], + } + } +} + +impl TargetPropertyState { + fn enabled(&self, property: TargetProperty) -> bool { + self.enabled.contains(&property) + } +} + +pub(crate) fn on_insert( + state: &TargetPropertyState, + access: &A, + step: u64, + conn: usize, + table: usize, + row: &SimRow, + in_tx: bool, +) -> Result<(), String> { + if state.enabled(TargetProperty::InsertSelect) { + check_insert_select(access, conn, table, row)?; + } + if !in_tx { + maybe_run_periodic(state, access, step, table)?; + if state.enabled(TargetProperty::BankingTablesMatch) { + check_banking_tables_match(access)?; + } + } + Ok(()) +} + +pub(crate) fn on_delete( + state: &TargetPropertyState, + access: &A, + step: u64, + conn: usize, + table: usize, + row: &SimRow, + in_tx: bool, +) -> Result<(), String> { + if state.enabled(TargetProperty::DeleteSelect) { + check_delete_select(access, conn, table, row)?; + } + if !in_tx { + maybe_run_periodic(state, access, step, table)?; + if state.enabled(TargetProperty::BankingTablesMatch) { + check_banking_tables_match(access)?; + } + } + Ok(()) +} + +pub(crate) fn on_commit_or_rollback( + state: &TargetPropertyState, + access: &A, +) -> Result<(), String> { + if state.enabled(TargetProperty::BankingTablesMatch) { + check_banking_tables_match(access)?; + } + Ok(()) +} + +fn maybe_run_periodic( + state: &TargetPropertyState, + access: &A, + step: u64, + table: usize, +) -> Result<(), String> { + if state.periodic_every == 0 || !step.is_multiple_of(state.periodic_every) { + return Ok(()); + } + if state.enabled(TargetProperty::SelectSelectOptimizer) { + check_norec_select_select_optimizer(access, table)?; + } + if state.enabled(TargetProperty::WhereTrueFalseNull) { + check_tlp_partitions(access, table)?; + } + if state.enabled(TargetProperty::IndexRangeExcluded) { + check_index_range_excluded(access, table)?; + } + Ok(()) +} + +fn check_insert_select( + access: &A, + conn: usize, + table: usize, + row: &SimRow, +) -> Result<(), String> { + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = access.lookup_in_connection(conn, table, id)?; + if found != Some(row.clone()) { + return Err(format!( + "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" + )); + } + Ok(()) +} + +fn check_delete_select( + access: &A, + conn: usize, + table: usize, + row: &SimRow, +) -> Result<(), String> { + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if access.lookup_in_connection(conn, table, id)?.is_some() { + return Err(format!( + "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" + )); + } + Ok(()) +} + +fn check_norec_select_select_optimizer(access: &A, table: usize) -> Result<(), String> { + let table_plan = access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some((col_idx, col_ty)) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) + .map(|(idx, col)| (idx as u16, &col.ty)) + else { + return Ok(()); + }; + + let scanned_rows = access.collect_rows_for_table(table)?; + if scanned_rows.is_empty() { + return Ok(()); + } + + let predicate_value = match col_ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), + _ => return Ok(()), + }; + let where_count = access.count_by_col_eq(table, col_idx, &predicate_value)?; + let projected_true_count = scanned_rows + .iter() + .filter(|row| row.values[col_idx as usize] == predicate_value) + .count(); + if where_count != projected_true_count { + return Err(format!( + "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" + )); + } + Ok(()) +} + +fn check_tlp_partitions(access: &A, table: usize) -> Result<(), String> { + let table_plan = access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some(col_idx) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) + .map(|(idx, _)| idx as u16) + else { + return Ok(()); + }; + let total = access.count_rows(table)?; + let true_count = access.count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; + let false_count = access.count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; + let partition_sum = true_count + false_count; + if partition_sum != total { + return Err(format!( + "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" + )); + } + Ok(()) +} + +fn check_index_range_excluded(access: &A, table: usize) -> Result<(), String> { + let table_plan = access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let rows = access.collect_rows_for_table(table)?; + if rows.len() < 2 { + return Ok(()); + } + + for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { + if !cols.iter().all(|&col| { + matches!( + table_plan.columns[col as usize].ty, + AlgebraicType::U64 | AlgebraicType::Bool + ) + }) { + continue; + } + + let mut sorted_rows = rows.clone(); + sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); + let upper_key = sorted_rows[sorted_rows.len() - 1] + .project_key(cols) + .to_algebraic_value(); + let lower = Bound::Included(lower_key.clone()); + let upper = Bound::Excluded(upper_key.clone()); + + let mut expected_rows = sorted_rows + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + key >= lower_key && key < upper_key + }) + .collect::>(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let mut actual_rows = access.range_scan(table, cols, lower, upper)?; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + if actual_rows != expected_rows { + return Err(format!( + "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" + )); + } + } + + Ok(()) +} + +fn check_banking_tables_match(access: &A) -> Result<(), String> { + let schema = access.schema_plan(); + let debit = schema.tables.iter().position(|table| table.name == "debit_accounts"); + let credit = schema.tables.iter().position(|table| table.name == "credit_accounts"); + let (Some(left), Some(right)) = (debit, credit) else { + return Ok(()); + }; + + let left_rows = access.collect_rows_for_table(left)?; + let right_rows = access.collect_rows_for_table(right)?; + if left_rows != right_rows { + return Err(format!( + "[Shadow::AllTableHaveExpectedContent] banking mismatch: debit={left_rows:?}, credit={right_rows:?}" + )); + } + Ok(()) +} + +fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) +} diff --git a/crates/dst/src/targets/relational_db.rs b/crates/dst/src/targets/relational_db.rs index 0b494f6e54b..dff2a317b10 100644 --- a/crates/dst/src/targets/relational_db.rs +++ b/crates/dst/src/targets/relational_db.rs @@ -1,9 +1,9 @@ //! Basic RelationalDB simulator target using the shared table workload. -use std::path::Path; +use std::{ops::Bound, path::Path}; use spacetimedb_core::{ - db::relational_db::{MutTx as RelMutTx, RelationalDB, Tx as RelTx}, + db::relational_db::{MutTx as RelMutTx, RelationalDB}, messages::control_db::HostType, }; use spacetimedb_datastore::{ @@ -28,10 +28,13 @@ use crate::{ config::RunConfig, schema::{SchemaPlan, SimRow}, seed::DstSeed, - targets::harness::{self, TableTargetHarness}, + targets::{ + harness::{self, TableTargetHarness}, + properties::{self, TargetPropertyAccess, TargetPropertyState}, + }, workload::table_ops::{ - ConnectionWriteState, PropertyBound, TableProperty, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, - TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, + ConnectionWriteState, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, TableWorkloadExecutionFailure, + TableWorkloadInteraction, TableWorkloadOutcome, }, }; @@ -93,9 +96,12 @@ pub fn shrink_failure( /// Concrete `RelationalDB` execution harness for the shared table workload. struct RelationalDbEngine { + schema: SchemaPlan, db: RelationalDB, table_ids: Vec, execution: ConnectionWriteState, + properties: TargetPropertyState, + step: u64, } impl RelationalDbEngine { @@ -103,9 +109,12 @@ impl RelationalDbEngine { let db = bootstrap_relational_db()?; let table_ids = install_schema(&db, schema)?; Ok(Self { + schema: schema.clone(), db, table_ids, execution: ConnectionWriteState::new(num_connections), + properties: TargetPropertyState::default(), + step: 0, }) } @@ -173,13 +182,11 @@ impl RelationalDbEngine { &self, table_id: TableId, cols: &[u16], - lower: &PropertyBound, - upper: &PropertyBound, + lower: Bound, + upper: Bound, ) -> anyhow::Result> { let tx = self.db.begin_tx(Workload::ForTests); let cols = cols.iter().copied().collect::(); - let lower = lower.to_range_bound(); - let upper = upper.to_range_bound(); let rows = self .db .iter_by_col_range(&tx, table_id, cols, (lower, upper))? @@ -189,28 +196,110 @@ impl RelationalDbEngine { Ok(rows) } - fn in_tx_range_scan( + fn table_id(&self, table: usize) -> Result { + self.table_ids + .get(table) + .copied() + .ok_or_else(|| format!("table {table} out of range")) + } + + fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { + let table_id = self.table_id(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + Ok(self + .db + .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + } else { + self.fresh_lookup(table_id, id) + .map_err(|err| format!("fresh lookup failed: {err}")) + } + } + + fn count_rows_for_property(&self, table: usize) -> Result { + let table_id = self.table_id(table)?; + let tx = self.db.begin_tx(Workload::ForTests); + let total = self + .db + .iter(&tx, table_id) + .map_err(|err| format!("scan failed: {err}"))? + .count(); + let _ = self.db.release_tx(tx); + Ok(total) + } + + fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + let table_id = self.table_id(table)?; + let tx = self.db.begin_tx(Workload::ForTests); + let total = self + .db + .iter_by_col_eq(&tx, table_id, col, value) + .map_err(|err| format!("predicate query failed: {err}"))? + .count(); + let _ = self.db.release_tx(tx); + Ok(total) + } + + fn range_scan_for_property( &self, - tx: &RelMutTx, - table_id: TableId, + table: usize, cols: &[u16], - lower: &PropertyBound, - upper: &PropertyBound, - ) -> anyhow::Result> { - let cols = cols.iter().copied().collect::(); - let lower = lower.to_range_bound(); - let upper = upper.to_range_bound(); - let rows = self - .db - .iter_by_col_range_mut(tx, table_id, cols, (lower, upper))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect(); - Ok(rows) + lower: Bound, + upper: Bound, + ) -> Result, String> { + let table_id = self.table_id(table)?; + self.fresh_range_scan(table_id, cols, lower, upper) + .map_err(|err| format!("range scan failed: {err}")) + } + + fn with_property_state( + &mut self, + f: impl FnOnce(&TargetPropertyState, &Self) -> Result, + ) -> Result { + let state = std::mem::take(&mut self.properties); + let result = f(&state, self); + self.properties = state; + result + } +} + +impl TargetPropertyAccess for RelationalDbEngine { + fn schema_plan(&self) -> &SchemaPlan { + &self.schema + } + + fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { + Self::lookup_in_connection(self, conn, table, id) + } + + fn collect_rows_for_table(&self, table: usize) -> Result, String> { + Self::collect_rows_for_table(self, table).map_err(|err| format!("collect rows failed: {err}")) + } + + fn count_rows(&self, table: usize) -> Result { + Self::count_rows_for_property(self, table) + } + + fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + Self::count_by_col_eq_for_property(self, table, col, value) + } + + fn range_scan( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + Self::range_scan_for_property(self, table, cols, lower, upper) } } impl TableWorkloadEngine for RelationalDbEngine { fn execute(&mut self, interaction: &RelationalDbInteraction) -> Result<(), String> { + self.step = self.step.saturating_add(1); match interaction { RelationalDbInteraction::BeginTx { conn } => { self.execution.ensure_known_connection(*conn)?; @@ -235,6 +324,7 @@ impl TableWorkloadEngine for RelationalDbEngine { .commit_tx(tx) .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; self.execution.active_writer = None; + self.with_property_state(|state, access| properties::on_commit_or_rollback(state, access))?; } RelationalDbInteraction::RollbackTx { conn } => { self.execution.ensure_writer_owner(*conn, "rollback")?; @@ -243,16 +333,23 @@ impl TableWorkloadEngine for RelationalDbEngine { .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = self.db.rollback_mut_tx(tx); self.execution.active_writer = None; + self.with_property_state(|state, access| properties::on_commit_or_rollback(state, access))?; } RelationalDbInteraction::Insert { conn, table, row } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); self.with_mut_tx(*conn, *table, |db, table_id, tx| { let bsatn = row.to_bsatn().map_err(|err: anyhow::Error| err.to_string())?; db.insert(tx, table_id, &bsatn) .map_err(|err| format!("insert failed: {err}"))?; Ok(()) })?; + let step = self.step; + self.with_property_state(|state, access| { + properties::on_insert(state, access, step, *conn, *table, row, in_tx) + })?; } RelationalDbInteraction::Delete { conn, table, row } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); self.with_mut_tx(*conn, *table, |db, table_id, tx| { let deleted = db.delete_by_rel(tx, table_id, [row.to_product_value()]); if deleted != 1 { @@ -260,155 +357,10 @@ impl TableWorkloadEngine for RelationalDbEngine { } Ok(()) })?; - } - RelationalDbInteraction::Check(TableProperty::VisibleInConnection { conn, table, row }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { - self.db - .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("in-tx lookup failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .any(|candidate| candidate == *row) - } else { - self.fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))? - == Some(row.clone()) - }; - if !found { - return Err(format!("row not visible in connection after write: {row:?}")); - } - } - RelationalDbInteraction::Check(TableProperty::MissingInConnection { conn, table, row }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { - self.db - .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("in-tx lookup failed: {err}"))? - .next() - .is_some() - } else { - self.fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))? - .is_some() - }; - if found { - return Err(format!("row still visible in connection after delete: {row:?}")); - } - } - RelationalDbInteraction::Check(TableProperty::VisibleFresh { table, row }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = self - .fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))?; - if found != Some(row.clone()) { - return Err(format!("fresh lookup mismatch: expected={row:?} actual={found:?}")); - } - } - RelationalDbInteraction::Check(TableProperty::MissingFresh { table, row }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - if self - .fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}"))? - .is_some() - { - return Err(format!("fresh lookup still found deleted row: {row:?}")); - } - } - RelationalDbInteraction::Check(TableProperty::RowCountFresh { table, expected }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let tx: RelTx = self.db.begin_tx(Workload::ForTests); - let actual = self - .db - .iter(&tx, table_id) - .map_err(|err| format!("row count scan failed: {err}"))? - .count() as u64; - let _ = self.db.release_tx(tx); - if actual != *expected { - return Err(format!("row count mismatch: expected={expected} actual={actual}")); - } - } - RelationalDbInteraction::Check(TableProperty::RangeScanInConnection { - conn, - table, - cols, - lower, - upper, - expected_rows, - }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let mut actual_rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(*conn) { - self.in_tx_range_scan(tx, table_id, cols, lower, upper) - .map_err(|err| format!("in-tx range scan failed: {err}"))? - } else { - self.fresh_range_scan(table_id, cols, lower, upper) - .map_err(|err| format!("fresh range scan failed: {err}"))? - }; - actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - let mut expected_rows = expected_rows.clone(); - expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - if actual_rows != expected_rows { - return Err(format!( - "connection range scan mismatch on table {table}, cols={cols:?}: expected={expected_rows:?} actual={actual_rows:?}" - )); - } - } - RelationalDbInteraction::Check(TableProperty::RangeScanFresh { - table, - cols, - lower, - upper, - expected_rows, - }) => { - let table_id = *self - .table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let mut actual_rows = self - .fresh_range_scan(table_id, cols, lower, upper) - .map_err(|err| format!("fresh range scan failed: {err}"))?; - actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - let mut expected_rows = expected_rows.clone(); - expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - if actual_rows != expected_rows { - return Err(format!( - "fresh range scan mismatch on table {table}, cols={cols:?}: expected={expected_rows:?} actual={actual_rows:?}" - )); - } - } - RelationalDbInteraction::Check(TableProperty::TablesMatchFresh { left, right }) => { - let left_rows = self - .collect_rows_for_table(*left) - .map_err(|err| format!("left table collect failed: {err}"))?; - let right_rows = self - .collect_rows_for_table(*right) - .map_err(|err| format!("right table collect failed: {err}"))?; - if left_rows != right_rows { - return Err(format!( - "fresh table mismatch: left_table={left} right_table={right} left={left_rows:?} right={right_rows:?}" - )); - } + let step = self.step; + self.with_property_state(|state, access| { + properties::on_delete(state, access, step, *conn, *table, row, in_tx) + })?; } } @@ -516,10 +468,3 @@ fn install_schema(db: &RelationalDB, schema: &SchemaPlan) -> anyhow::Result std::cmp::Ordering { - lhs.project_key(cols) - .to_algebraic_value() - .cmp(&rhs.project_key(cols).to_algebraic_value()) - .then_with(|| lhs.values.cmp(&rhs.values)) -} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index f54dfe1eeec..aff08df65c3 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -1,7 +1,7 @@ use std::collections::VecDeque; use crate::{ - schema::{SchemaPlan, SimRow, TablePlan}, + schema::SchemaPlan, seed::{DstRng, DstSeed}, }; @@ -14,17 +14,29 @@ use super::{model::GenerationModel, TableScenario, TableWorkloadInteraction}; /// memory up front. #[derive(Clone, Debug)] pub struct InteractionStream { + // Deterministic source for all planner choices. rng: DstRng, + // Scenario-specific workload policy layered on top of the shared model. scenario: S, + // Generator-side expected state used to decide what interactions are legal. model: GenerationModel, num_connections: usize, + // Soft budget for scenario-generated interactions. Finish mode may emit a + // few extra commit/follow-up interactions to close open transactions. target_interactions: usize, emitted: usize, + // When the budget is exhausted, we walk connections in order and commit any + // still-open transaction so the stream ends in a clean state. finalize_conn: usize, + // Scenario code can enqueue a burst of interactions at once: for example a + // mutation followed by one or more property checks. pending: VecDeque, finished: bool, } +/// Narrow helper passed to scenario code so scenario-specific planning can +/// inspect the current model and enqueue interactions without owning the whole +/// stream state machine. pub struct ScenarioPlanner<'a> { rng: &'a mut DstRng, model: &'a mut GenerationModel, @@ -44,6 +56,10 @@ impl<'a> ScenarioPlanner<'a> { self.rng.index(100) < percent } + /// Tries to emit one transaction control interaction for `conn`. + /// + /// The shared generator owns transaction lifecycle so scenario code can + /// focus on domain operations like inserts, deletes, and range checks. pub fn maybe_control_tx(&mut self, conn: usize, begin_pct: usize, commit_pct: usize, rollback_pct: usize) -> bool { if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() && self.roll_percent(begin_pct) { self.model.begin_tx(conn); @@ -52,16 +68,14 @@ impl<'a> ScenarioPlanner<'a> { } if self.model.connections[conn].in_tx && self.roll_percent(commit_pct) { - let followups = self.model.commit(conn); + self.model.commit(conn); self.pending.push_back(TableWorkloadInteraction::CommitTx { conn }); - self.pending.extend(followups); return true; } if self.model.connections[conn].in_tx && self.roll_percent(rollback_pct) { - let followups = self.model.rollback(conn); + self.model.rollback(conn); self.pending.push_back(TableWorkloadInteraction::RollbackTx { conn }); - self.pending.extend(followups); return true; } @@ -72,10 +86,6 @@ impl<'a> ScenarioPlanner<'a> { self.model.visible_rows(conn, table) } - pub fn committed_rows(&self, table: usize) -> Vec { - self.model.committed_rows(table) - } - pub fn make_row(&mut self, table: usize) -> crate::schema::SimRow { self.model.make_row(self.rng, table) } @@ -88,18 +98,6 @@ impl<'a> ScenarioPlanner<'a> { self.model.delete(conn, table, row); } - pub fn last_inserted_row(&self, conn: usize) -> Option { - self.model.last_inserted_row(conn) - } - - pub fn in_tx(&self, conn: usize) -> bool { - self.model.connections[conn].in_tx - } - - pub fn table_plan(&self, table: usize) -> &TablePlan { - &self.model.schema.tables[table] - } - pub fn push_interaction(&mut self, interaction: TableWorkloadInteraction) { self.pending.push_back(interaction); } @@ -113,11 +111,10 @@ impl InteractionStream { num_connections: usize, target_interactions: usize, ) -> Self { - let scenario_commit_properties = scenario.commit_properties(); Self { rng: seed.fork(17).rng(), scenario, - model: GenerationModel::new(&schema, num_connections, seed, scenario_commit_properties), + model: GenerationModel::new(&schema, num_connections, seed), num_connections, target_interactions, emitted: 0, @@ -133,13 +130,14 @@ impl InteractionStream { fn fill_pending(&mut self) { if self.emitted >= self.target_interactions { + // Once the workload budget is spent, stop asking the scenario for + // more work and only flush any open transaction state. while self.finalize_conn < self.num_connections { let conn = self.finalize_conn; self.finalize_conn += 1; if self.model.connections[conn].in_tx { - let followups = self.model.commit(conn); + self.model.commit(conn); self.pending.push_back(TableWorkloadInteraction::CommitTx { conn }); - self.pending.extend(followups); return; } } @@ -147,6 +145,9 @@ impl InteractionStream { return; } + // Locking targets allow only one writer at a time. If a writer is + // already open, keep driving that same connection until it commits or + // rolls back. Otherwise pick a fresh connection uniformly. let conn = self .model .active_writer() @@ -165,6 +166,8 @@ impl Iterator for InteractionStream { fn next(&mut self) -> Option { loop { + // Scenario planning fills `pending` in bursts, but the iterator + // surface stays one interaction at a time. if let Some(interaction) = self.pending.pop_front() { self.emitted += 1; return Some(interaction); diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index d685cbe388b..c25cb02895e 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -2,14 +2,11 @@ mod generation; mod model; -mod properties; mod runner; mod scenarios; mod types; pub(crate) use generation::InteractionStream; -pub(crate) use properties::{followup_properties_after_commit, property_interaction}; -pub use properties::{PropertyBound, TableProperty}; pub(crate) use runner::{execute_interactions, run_generated_with_engine}; pub use scenarios::TableScenarioId; pub(crate) use types::{ConnectionWriteState, TableScenario, TableWorkloadEngine}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index e83668cbc63..206f15722ac 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -1,5 +1,3 @@ -use std::collections::BTreeSet; - use spacetimedb_sats::AlgebraicValue; use crate::{ @@ -7,7 +5,7 @@ use crate::{ seed::{DstRng, DstSeed}, }; -use super::{followup_properties_after_commit, property_interaction, TableProperty, TableWorkloadInteraction}; +use super::TableWorkloadInteraction; /// Generator-side model of committed rows plus per-connection pending writes. /// @@ -21,7 +19,6 @@ pub(crate) struct GenerationModel { committed: Vec>, next_ids: Vec, active_writer: Option, - scenario_commit_properties: Vec, } #[derive(Clone, Debug, Default)] @@ -29,16 +26,10 @@ pub(crate) struct PendingConnection { pub(crate) in_tx: bool, staged_inserts: Vec<(usize, SimRow)>, staged_deletes: Vec<(usize, SimRow)>, - last_auto_committed_insert: Option, } impl GenerationModel { - pub(crate) fn new( - schema: &SchemaPlan, - num_connections: usize, - seed: DstSeed, - scenario_commit_properties: Vec, - ) -> Self { + pub(crate) fn new(schema: &SchemaPlan, num_connections: usize, seed: DstSeed) -> Self { Self { schema: schema.clone(), connections: vec![PendingConnection::default(); num_connections], @@ -47,7 +38,6 @@ impl GenerationModel { .map(|idx| seed.fork(idx as u64 + 100).0) .collect(), active_writer: None, - scenario_commit_properties, } } @@ -78,10 +68,6 @@ impl GenerationModel { rows } - pub(crate) fn committed_rows(&self, table: usize) -> Vec { - self.committed[table].clone() - } - pub(crate) fn active_writer(&self) -> Option { self.active_writer } @@ -99,15 +85,10 @@ impl GenerationModel { if pending.in_tx { pending.staged_inserts.push((table, row)); } else { - self.committed[table].push(row.clone()); - pending.last_auto_committed_insert = Some(row); + self.committed[table].push(row); } } - pub(crate) fn last_inserted_row(&self, conn: usize) -> Option { - self.connections[conn].last_auto_committed_insert.clone() - } - pub(crate) fn delete(&mut self, conn: usize, table: usize, row: SimRow) { let pending = &mut self.connections[conn]; if pending.in_tx { @@ -120,7 +101,7 @@ impl GenerationModel { } } - pub(crate) fn commit(&mut self, conn: usize) -> Vec { + pub(crate) fn commit(&mut self, conn: usize) { let pending = &mut self.connections[conn]; let inserts = std::mem::take(&mut pending.staged_inserts); let deletes = std::mem::take(&mut pending.staged_deletes); @@ -133,33 +114,14 @@ impl GenerationModel { for (table, row) in &inserts { self.committed[*table].push(row.clone()); } - - followup_properties_after_commit(self.scenario_commit_properties.clone(), inserts, deletes) } - pub(crate) fn rollback(&mut self, conn: usize) -> Vec { + pub(crate) fn rollback(&mut self, conn: usize) { let pending = &mut self.connections[conn]; - let touched_tables = pending - .staged_inserts - .iter() - .chain(pending.staged_deletes.iter()) - .map(|(table, _)| *table) - .collect::>(); pending.staged_inserts.clear(); pending.staged_deletes.clear(); pending.in_tx = false; self.active_writer = None; - let mut followups = touched_tables - .into_iter() - .map(|table| { - property_interaction(TableProperty::RowCountFresh { - table, - expected: self.committed[table].len() as u64, - }) - }) - .collect::>(); - followups.extend(self.scenario_commit_properties.clone()); - followups } } @@ -244,7 +206,6 @@ impl ExpectedModel { self.committed[*table].retain(|candidate| *candidate != *row); } } - TableWorkloadInteraction::Check(_) => {} } } diff --git a/crates/dst/src/workload/table_ops/properties.rs b/crates/dst/src/workload/table_ops/properties.rs deleted file mode 100644 index 5aab032126b..00000000000 --- a/crates/dst/src/workload/table_ops/properties.rs +++ /dev/null @@ -1,89 +0,0 @@ -use std::ops::Bound; - -use serde::{Deserialize, Serialize}; - -use crate::schema::SimRow; - -use super::TableWorkloadInteraction; - -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub enum PropertyBound { - Unbounded, - Included(SimRow), - Excluded(SimRow), -} - -impl PropertyBound { - pub fn to_range_bound(&self) -> Bound { - match self { - Self::Unbounded => Bound::Unbounded, - Self::Included(key) => Bound::Included(key.to_algebraic_value()), - Self::Excluded(key) => Bound::Excluded(key.to_algebraic_value()), - } - } -} - -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub enum TableProperty { - VisibleInConnection { - conn: usize, - table: usize, - row: SimRow, - }, - MissingInConnection { - conn: usize, - table: usize, - row: SimRow, - }, - VisibleFresh { - table: usize, - row: SimRow, - }, - MissingFresh { - table: usize, - row: SimRow, - }, - RowCountFresh { - table: usize, - expected: u64, - }, - RangeScanInConnection { - conn: usize, - table: usize, - cols: Vec, - lower: PropertyBound, - upper: PropertyBound, - expected_rows: Vec, - }, - RangeScanFresh { - table: usize, - cols: Vec, - lower: PropertyBound, - upper: PropertyBound, - expected_rows: Vec, - }, - TablesMatchFresh { - left: usize, - right: usize, - }, -} - -pub(crate) fn property_interaction(property: TableProperty) -> TableWorkloadInteraction { - TableWorkloadInteraction::Check(property) -} - -pub(crate) fn followup_properties_after_commit( - scenario_commit_properties: Vec, - inserts: Vec<(usize, SimRow)>, - deletes: Vec<(usize, SimRow)>, -) -> Vec { - let mut followups = Vec::new(); - for (table, row) in inserts { - followups.push(property_interaction(TableProperty::VisibleFresh { table, row })); - } - for (table, row) in deletes { - followups.push(property_interaction(TableProperty::MissingFresh { table, row })); - } - followups.extend(scenario_commit_properties); - followups -} diff --git a/crates/dst/src/workload/table_ops/runner.rs b/crates/dst/src/workload/table_ops/runner.rs index a8b594e0a9d..d17846be351 100644 --- a/crates/dst/src/workload/table_ops/runner.rs +++ b/crates/dst/src/workload/table_ops/runner.rs @@ -3,8 +3,8 @@ use std::time::Instant; use crate::{config::RunConfig, schema::SchemaPlan, seed::DstSeed}; use super::{ - model::ExpectedModel, InteractionStream, TableProperty, TableScenario, TableWorkloadEngine, - TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome, + model::ExpectedModel, InteractionStream, TableScenario, TableWorkloadEngine, TableWorkloadExecutionFailure, + TableWorkloadInteraction, TableWorkloadOutcome, }; pub fn execute_interactions( @@ -29,7 +29,7 @@ where .map_err(|reason| TableWorkloadExecutionFailure { step_index, reason, - interaction: interaction.clone(), + interaction: Some(interaction.clone()), })?; expected.apply(&interaction); } @@ -110,9 +110,6 @@ fn failure_without_step(reason: String) -> TableWorkloadExecutionFailure { TableWorkloadExecutionFailure { step_index: usize::MAX, reason, - interaction: TableWorkloadInteraction::Check(TableProperty::RowCountFresh { - table: usize::MAX, - expected: 0, - }), + interaction: None, } } diff --git a/crates/dst/src/workload/table_ops/scenarios/banking.rs b/crates/dst/src/workload/table_ops/scenarios/banking.rs index ccf4274643b..9039fa3738f 100644 --- a/crates/dst/src/workload/table_ops/scenarios/banking.rs +++ b/crates/dst/src/workload/table_ops/scenarios/banking.rs @@ -2,11 +2,7 @@ use spacetimedb_sats::AlgebraicType; use crate::schema::{ColumnPlan, SchemaPlan, TablePlan}; -use super::super::{ - generation::ScenarioPlanner, - properties::{property_interaction, TableProperty}, - TableWorkloadInteraction, TableWorkloadOutcome, -}; +use super::super::{generation::ScenarioPlanner, TableWorkloadInteraction, TableWorkloadOutcome}; pub fn generate_schema() -> SchemaPlan { SchemaPlan { @@ -87,35 +83,11 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { table: 0, row: row.clone(), }); - planner.push_interaction(property_interaction(TableProperty::VisibleInConnection { - conn, - table: 0, - row: row.clone(), - })); planner.push_interaction(TableWorkloadInteraction::Insert { conn, table: 1, row: mirror.clone(), }); - planner.push_interaction(property_interaction(TableProperty::VisibleInConnection { - conn, - table: 1, - row: mirror.clone(), - })); - if !planner.in_tx(conn) { - planner.push_interaction(property_interaction(TableProperty::VisibleFresh { - table: 0, - row: row.clone(), - })); - planner.push_interaction(property_interaction(TableProperty::VisibleFresh { - table: 1, - row: mirror, - })); - planner.push_interaction(property_interaction(TableProperty::TablesMatchFresh { - left: 0, - right: 1, - })); - } return; } @@ -128,33 +100,9 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { table: 0, row: row.clone(), }); - planner.push_interaction(property_interaction(TableProperty::MissingInConnection { - conn, - table: 0, - row: row.clone(), - })); planner.push_interaction(TableWorkloadInteraction::Delete { conn, table: 1, row: mirror.clone(), }); - planner.push_interaction(property_interaction(TableProperty::MissingInConnection { - conn, - table: 1, - row: mirror.clone(), - })); - if !planner.in_tx(conn) { - planner.push_interaction(property_interaction(TableProperty::MissingFresh { - table: 0, - row: row.clone(), - })); - planner.push_interaction(property_interaction(TableProperty::MissingFresh { - table: 1, - row: mirror, - })); - planner.push_interaction(property_interaction(TableProperty::TablesMatchFresh { - left: 0, - right: 1, - })); - } } diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs index 28cdcc6ed28..9ac7cab4f12 100644 --- a/crates/dst/src/workload/table_ops/scenarios/mod.rs +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -5,9 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::{schema::SchemaPlan, seed::DstRng}; -use super::{ - generation::ScenarioPlanner, TableProperty, TableScenario, TableWorkloadInteraction, TableWorkloadOutcome, -}; +use super::{generation::ScenarioPlanner, TableScenario, TableWorkloadOutcome}; #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub(crate) struct RandomCrudScenario; @@ -35,10 +33,6 @@ impl TableScenario for RandomCrudScenario { random_crud::validate_outcome(schema, outcome) } - fn commit_properties(&self) -> Vec { - Vec::new() - } - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { random_crud::fill_pending(planner, conn); } @@ -53,12 +47,6 @@ impl TableScenario for BankingScenario { banking::validate_outcome(schema, outcome) } - fn commit_properties(&self) -> Vec { - vec![super::properties::property_interaction( - TableProperty::TablesMatchFresh { left: 0, right: 1 }, - )] - } - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { banking::fill_pending(planner, conn); } @@ -73,10 +61,6 @@ impl TableScenario for IndexedRangesScenario { random_crud::validate_outcome(schema, outcome) } - fn commit_properties(&self) -> Vec { - Vec::new() - } - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { random_crud::fill_pending_indexed_ranges(planner, conn); } @@ -99,14 +83,6 @@ impl TableScenario for TableScenarioId { } } - fn commit_properties(&self) -> Vec { - match self { - Self::RandomCrud => RandomCrudScenario.commit_properties(), - Self::IndexedRanges => IndexedRangesScenario.commit_properties(), - Self::Banking => BankingScenario.commit_properties(), - } - } - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { match self { Self::RandomCrud => RandomCrudScenario.fill_pending(planner, conn), diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index ccf4db91726..4b103584f12 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -1,5 +1,3 @@ -use std::cmp::Ordering; - use spacetimedb_sats::AlgebraicType; use crate::{ @@ -7,11 +5,7 @@ use crate::{ seed::DstRng, }; -use super::super::{ - generation::ScenarioPlanner, - properties::{property_interaction, PropertyBound, TableProperty}, - TableWorkloadOutcome, -}; +use super::super::{generation::ScenarioPlanner, TableWorkloadInteraction, TableWorkloadOutcome}; #[derive(Clone, Copy)] struct ScenarioTuning { @@ -25,9 +19,6 @@ struct ScenarioTuning { single_index_pct: usize, composite2_index_pct: usize, composite3_index_pct: usize, - range_probe_pct: usize, - in_tx_probe_pct: usize, - composite_probe_pct: usize, insert_pct: usize, begin_tx_pct: usize, commit_tx_pct: usize, @@ -45,9 +36,6 @@ const RANDOM_CRUD_TUNING: ScenarioTuning = ScenarioTuning { single_index_pct: 70, composite2_index_pct: 65, composite3_index_pct: 30, - range_probe_pct: 10, - in_tx_probe_pct: 60, - composite_probe_pct: 70, insert_pct: 65, begin_tx_pct: 20, commit_tx_pct: 15, @@ -65,9 +53,6 @@ const INDEXED_RANGES_TUNING: ScenarioTuning = ScenarioTuning { single_index_pct: 100, composite2_index_pct: 100, composite3_index_pct: 75, - range_probe_pct: 45, - in_tx_probe_pct: 65, - composite_probe_pct: 90, insert_pct: 55, begin_tx_pct: 20, commit_tx_pct: 15, @@ -156,171 +141,20 @@ fn fill_pending_with_tuning(planner: &mut ScenarioPlanner<'_>, conn: usize, tuni } let table = planner.choose_table(); - if planner.roll_percent(tuning.range_probe_pct) && maybe_emit_range_probe(planner, conn, table, tuning) { - return; - } - let visible_rows = planner.visible_rows(conn, table); let choose_insert = visible_rows.is_empty() || planner.roll_percent(tuning.insert_pct); if choose_insert { let row = planner.make_row(table); planner.insert(conn, table, row.clone()); - planner.push_interaction(super::super::TableWorkloadInteraction::Insert { - conn, - table, - row: row.clone(), - }); - planner.push_interaction(property_interaction(TableProperty::VisibleInConnection { - conn, - table, - row, - })); - if !planner.in_tx(conn) { - let row = planner.last_inserted_row(conn).expect("tracked auto-commit insert"); - planner.push_interaction(property_interaction(TableProperty::VisibleFresh { table, row })); - } + planner.push_interaction(TableWorkloadInteraction::Insert { conn, table, row }); return; } let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); planner.delete(conn, table, row.clone()); - planner.push_interaction(super::super::TableWorkloadInteraction::Delete { - conn, - table, - row: row.clone(), - }); - planner.push_interaction(property_interaction(TableProperty::MissingInConnection { - conn, - table, - row: row.clone(), - })); - if !planner.in_tx(conn) { - planner.push_interaction(property_interaction(TableProperty::MissingFresh { table, row })); - } + planner.push_interaction(TableWorkloadInteraction::Delete { conn, table, row }); } fn is_range_compatible(ty: &AlgebraicType) -> bool { matches!(ty, AlgebraicType::U64 | AlgebraicType::Bool) } - -fn maybe_emit_range_probe( - planner: &mut ScenarioPlanner<'_>, - conn: usize, - table: usize, - tuning: ScenarioTuning, -) -> bool { - let table_plan = planner.table_plan(table); - let mut probe_indexes = vec![vec![0]]; - probe_indexes.extend( - table_plan - .extra_indexes - .iter() - .filter(|cols| { - cols.iter() - .all(|&col| is_range_compatible(&table_plan.columns[col as usize].ty)) - }) - .cloned(), - ); - if probe_indexes.is_empty() { - return false; - } - - let use_connection_view = planner.in_tx(conn) && planner.roll_percent(tuning.in_tx_probe_pct); - let basis_rows = if use_connection_view { - planner.visible_rows(conn, table) - } else { - planner.committed_rows(table) - }; - if basis_rows.is_empty() { - return false; - } - - let composite_indexes = probe_indexes - .iter() - .filter(|cols| cols.len() > 1) - .cloned() - .collect::>(); - let cols = if !composite_indexes.is_empty() && planner.roll_percent(tuning.composite_probe_pct) { - composite_indexes[planner.choose_index(composite_indexes.len())].clone() - } else { - probe_indexes[planner.choose_index(probe_indexes.len())].clone() - }; - - let lower = choose_bound(planner, &basis_rows, &cols); - let upper = choose_bound(planner, &basis_rows, &cols); - let (lower, upper) = normalize_bounds(lower, upper); - let mut expected_rows = basis_rows - .into_iter() - .filter(|row| key_in_bounds(&row.project_key(&cols).to_algebraic_value(), &lower, &upper)) - .collect::>(); - expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, &cols)); - - let property = if use_connection_view { - TableProperty::RangeScanInConnection { - conn, - table, - cols, - lower, - upper, - expected_rows, - } - } else { - TableProperty::RangeScanFresh { - table, - cols, - lower, - upper, - expected_rows, - } - }; - planner.push_interaction(property_interaction(property)); - true -} - -fn choose_bound(planner: &mut ScenarioPlanner<'_>, rows: &[crate::schema::SimRow], cols: &[u16]) -> PropertyBound { - if planner.roll_percent(20) { - return PropertyBound::Unbounded; - } - let row = &rows[planner.choose_index(rows.len())]; - let key = row.project_key(cols); - if planner.roll_percent(50) { - PropertyBound::Included(key) - } else { - PropertyBound::Excluded(key) - } -} - -fn normalize_bounds(lower: PropertyBound, upper: PropertyBound) -> (PropertyBound, PropertyBound) { - match (bound_key(&lower), bound_key(&upper)) { - (Some(left), Some(right)) if left > right => (upper, lower), - _ => (lower, upper), - } -} - -fn bound_key(bound: &PropertyBound) -> Option { - match bound { - PropertyBound::Unbounded => None, - PropertyBound::Included(key) | PropertyBound::Excluded(key) => Some(key.to_algebraic_value()), - } -} - -fn key_in_bounds(key: &spacetimedb_sats::AlgebraicValue, lower: &PropertyBound, upper: &PropertyBound) -> bool { - let lower_ok = match lower { - PropertyBound::Unbounded => true, - PropertyBound::Included(bound) => key >= &bound.to_algebraic_value(), - PropertyBound::Excluded(bound) => key > &bound.to_algebraic_value(), - }; - let upper_ok = match upper { - PropertyBound::Unbounded => true, - PropertyBound::Included(bound) => key <= &bound.to_algebraic_value(), - PropertyBound::Excluded(bound) => key < &bound.to_algebraic_value(), - }; - lower_ok && upper_ok -} - -fn compare_rows_by_cols(lhs: &crate::schema::SimRow, rhs: &crate::schema::SimRow, cols: &[u16]) -> Ordering { - lhs.project_key(cols) - .to_algebraic_value() - .cmp(&rhs.project_key(cols).to_algebraic_value()) - .then_with(|| lhs.values.cmp(&rhs.values)) -} diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index e9ffb65c438..d4ac0dd726f 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -5,7 +5,7 @@ use crate::{ seed::DstRng, }; -use super::{generation::ScenarioPlanner, properties::TableProperty, scenarios::TableScenarioId}; +use super::{generation::ScenarioPlanner, scenarios::TableScenarioId}; /// Scenario hook for shared table-oriented workloads. /// @@ -14,7 +14,6 @@ use super::{generation::ScenarioPlanner, properties::TableProperty, scenarios::T pub(crate) trait TableScenario: Clone { fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan; fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()>; - fn commit_properties(&self) -> Vec; fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize); } @@ -41,7 +40,6 @@ pub enum TableWorkloadInteraction { RollbackTx { conn: usize }, Insert { conn: usize, table: usize, row: SimRow }, Delete { conn: usize, table: usize, row: SimRow }, - Check(TableProperty), } /// Final state gathered from a table-workload engine after execution ends. @@ -61,7 +59,7 @@ pub struct TableWorkloadExecutionFailure { /// Target-provided error message. pub reason: String, /// Interaction that triggered the failure. - pub(crate) interaction: TableWorkloadInteraction, + pub(crate) interaction: Option, } /// Minimal engine interface implemented by concrete table-oriented targets. From d8e3fd6e6c0a29364c5082932c530134133ba314 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 23 Apr 2026 16:45:26 +0530 Subject: [PATCH 10/74] traits --- crates/dst/src/main.rs | 128 ++- crates/dst/src/targets/mod.rs | 1 + .../src/targets/relational_db_commitlog.rs | 919 ++++++++++++++++++ .../src/workload/commitlog_ops/generation.rs | 131 +++ crates/dst/src/workload/commitlog_ops/mod.rs | 7 + .../dst/src/workload/commitlog_ops/types.rs | 46 + crates/dst/src/workload/mod.rs | 1 + 7 files changed, 1194 insertions(+), 39 deletions(-) create mode 100644 crates/dst/src/targets/relational_db_commitlog.rs create mode 100644 crates/dst/src/workload/commitlog_ops/generation.rs create mode 100644 crates/dst/src/workload/commitlog_ops/mod.rs create mode 100644 crates/dst/src/workload/commitlog_ops/types.rs diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index 5cfb5696128..59a67394a87 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -7,7 +7,7 @@ use clap::{Args, Parser, Subcommand, ValueEnum}; use spacetimedb_dst::{ config::RunConfig, seed::DstSeed, - targets::{datastore, relational_db}, + targets::{datastore, relational_db, relational_db_commitlog}, workload::table_ops::TableScenarioId, }; @@ -68,6 +68,7 @@ struct ShrinkArgs { enum TargetKind { Datastore, RelationalDb, + RelationalDbCommitlog, } #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] @@ -88,6 +89,7 @@ impl From for TableScenarioId { } fn main() -> anyhow::Result<()> { + init_tracing(); match Cli::parse().command { Command::Run(args) => run_command(args), Command::Replay(args) => replay_command(args), @@ -95,6 +97,19 @@ fn main() -> anyhow::Result<()> { } } +fn init_tracing() { + use tracing_subscriber::{fmt, EnvFilter}; + + let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); + let _ = fmt() + .with_env_filter(filter) + .with_target(false) + .with_thread_ids(false) + .with_thread_names(false) + .compact() + .try_init(); +} + fn run_command(args: RunArgs) -> anyhow::Result<()> { let seed = resolve_seed(args.seed); let config = build_config(args.duration.as_deref(), args.max_interactions)?; @@ -103,6 +118,7 @@ fn run_command(args: RunArgs) -> anyhow::Result<()> { match args.target.target { TargetKind::Datastore => run_datastore(seed, scenario, config, args.save_case), TargetKind::RelationalDb => run_relational(seed, scenario, config, args.save_case), + TargetKind::RelationalDbCommitlog => run_relational_commitlog(seed, scenario, config, args.save_case), } } @@ -110,6 +126,7 @@ fn replay_command(args: ReplayArgs) -> anyhow::Result<()> { match args.target.target { TargetKind::Datastore => replay_datastore(&args.path), TargetKind::RelationalDb => replay_relational(&args.path), + TargetKind::RelationalDbCommitlog => replay_relational_commitlog(&args.path), } } @@ -117,6 +134,7 @@ fn shrink_command(args: ShrinkArgs) -> anyhow::Result<()> { match args.target.target { TargetKind::Datastore => shrink_datastore(&args.path, args.save_shrunk.as_ref()), TargetKind::RelationalDb => shrink_relational(&args.path, args.save_shrunk.as_ref()), + TargetKind::RelationalDbCommitlog => shrink_relational_commitlog(&args.path, args.save_shrunk.as_ref()), } } @@ -148,27 +166,17 @@ fn run_datastore( config: RunConfig, save_case: Option, ) -> anyhow::Result<()> { - if config.max_duration_ms.is_some() { - if save_case.is_some() { - anyhow::bail!("duration-based streamed runs do not support save-case"); - } - let outcome = datastore::run_generated_with_config_and_scenario(seed, scenario, config)?; - println!( - "ok target=datastore seed={} tables={} row_counts={:?}", - seed.0, - outcome.final_rows.len(), - outcome.final_row_counts - ); - return Ok(()); - } - - let max_interactions = config.max_interactions.unwrap_or(1_000); - let case = datastore::materialize_case(seed, scenario, max_interactions); - if let Some(path) = &save_case { - datastore::save_case(path, &case)?; - println!("saved_case={}", path.display()); + if save_case.is_some() { + anyhow::bail!("save-case is not supported in streaming run mode"); } - replay_datastore_case(&case) + let outcome = datastore::run_generated_with_config_and_scenario(seed, scenario, config)?; + println!( + "ok target=datastore seed={} tables={} row_counts={:?}", + seed.0, + outcome.final_rows.len(), + outcome.final_row_counts + ); + Ok(()) } fn run_relational( @@ -177,27 +185,34 @@ fn run_relational( config: RunConfig, save_case: Option, ) -> anyhow::Result<()> { - if config.max_duration_ms.is_some() { - if save_case.is_some() { - anyhow::bail!("duration-based streamed runs do not support save-case"); - } - let outcome = relational_db::run_generated_with_config_and_scenario(seed, scenario, config)?; - println!( - "ok target=relational_db seed={} tables={} row_counts={:?}", - seed.0, - outcome.final_rows.len(), - outcome.final_row_counts - ); - return Ok(()); + if save_case.is_some() { + anyhow::bail!("save-case is not supported in streaming run mode"); } + let outcome = relational_db::run_generated_with_config_and_scenario(seed, scenario, config)?; + println!( + "ok target=relational_db seed={} tables={} row_counts={:?}", + seed.0, + outcome.final_rows.len(), + outcome.final_row_counts + ); + Ok(()) +} - let max_interactions = config.max_interactions.unwrap_or(1_000); - let case = relational_db::materialize_case(seed, scenario, max_interactions); - if let Some(path) = &save_case { - relational_db::save_case(path, &case)?; - println!("saved_case={}", path.display()); +fn run_relational_commitlog( + seed: DstSeed, + scenario: TableScenarioId, + config: RunConfig, + save_case: Option, +) -> anyhow::Result<()> { + if save_case.is_some() { + anyhow::bail!("save-case is not supported in streaming run mode"); } - replay_relational_case(&case) + let outcome = relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config)?; + println!( + "ok target=relational_db_commitlog seed={} steps={} durable_commits={} replay_tables={}", + seed.0, outcome.applied_steps, outcome.durable_commit_count, outcome.replay_table_count + ); + Ok(()) } fn replay_datastore(path: &Path) -> anyhow::Result<()> { @@ -210,6 +225,11 @@ fn replay_relational(path: &Path) -> anyhow::Result<()> { replay_relational_case(&case) } +fn replay_relational_commitlog(path: &Path) -> anyhow::Result<()> { + let case = relational_db_commitlog::load_case(path)?; + replay_relational_commitlog_case(&case) +} + fn replay_datastore_case(case: &datastore::DatastoreSimulatorCase) -> anyhow::Result<()> { match datastore::run_case_detailed(case) { Ok(_) => { @@ -250,6 +270,25 @@ fn replay_relational_case(case: &relational_db::RelationalDbSimulatorCase) -> an } } +fn replay_relational_commitlog_case(case: &relational_db_commitlog::RelationalDbCommitlogCase) -> anyhow::Result<()> { + match relational_db_commitlog::run_case_detailed(case) { + Ok(outcome) => { + println!( + "ok target=relational_db_commitlog seed={} steps={} durable_commits={} replay_tables={}", + case.seed.0, outcome.applied_steps, outcome.durable_commit_count, outcome.replay_table_count + ); + Ok(()) + } + Err(failure) => { + println!( + "fail target=relational_db_commitlog seed={} step={} reason={}", + case.seed.0, failure.step_index, failure.reason + ); + anyhow::bail!("relational_db_commitlog case failed") + } + } +} + fn shrink_datastore(path: &Path, save_shrunk: Option<&PathBuf>) -> anyhow::Result<()> { let case = datastore::load_case(path)?; let failure = datastore::run_case_detailed(&case).expect_err("shrink needs failing datastore case"); @@ -270,6 +309,17 @@ fn shrink_relational(path: &Path, save_shrunk: Option<&PathBuf>) -> anyhow::Resu Ok(()) } +fn shrink_relational_commitlog(path: &Path, save_shrunk: Option<&PathBuf>) -> anyhow::Result<()> { + let case = relational_db_commitlog::load_case(path)?; + let failure = relational_db_commitlog::run_case_detailed(&case) + .expect_err("shrink needs failing relational_db_commitlog case"); + let shrunk = relational_db_commitlog::shrink_failure(&case, &failure)?; + let out = shrunk_path(path, save_shrunk); + relational_db_commitlog::save_case(&out, &shrunk)?; + println!("shrunk_case={}", out.display()); + Ok(()) +} + fn shrunk_path(default_input: &Path, explicit: Option<&PathBuf>) -> PathBuf { explicit.cloned().unwrap_or_else(|| { let mut path = default_input.as_os_str().to_os_string(); diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index 2ae5a0e221a..52967c78b01 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -4,3 +4,4 @@ pub mod datastore; pub(crate) mod harness; pub(crate) mod properties; pub mod relational_db; +pub mod relational_db_commitlog; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs new file mode 100644 index 00000000000..29a93dc7f5d --- /dev/null +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -0,0 +1,919 @@ +//! RelationalDB DST target with mocked commitlog file chaos and replay checks. + +use std::{ + collections::{BTreeMap, HashMap}, + path::Path, + time::Instant, +}; + +use spacetimedb_commitlog::{self as commitlog, error::Traversal}; +use spacetimedb_core::{ + db::relational_db::{MutTx as RelMutTx, RelationalDB, Txdata}, + messages::control_db::HostType, +}; +use spacetimedb_datastore::{ + execution_context::Workload, + traits::{IsolationLevel, Program, TxData as DatastoreTxData}, +}; +use spacetimedb_durability::{EmptyHistory, History, TxOffset}; +use spacetimedb_lib::{ + db::auth::{StAccess, StTableType}, + Identity, +}; +use spacetimedb_primitives::TableId; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; +use spacetimedb_schema::{ + def::BTreeAlgorithm, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, + table_name::TableName, +}; +use spacetimedb_table::page_pool::PagePool; +use tracing::{debug, info, trace, warn}; + +use crate::{ + bugbase::{load_json, save_json}, + config::RunConfig, + schema::{SchemaPlan, SimRow}, + seed::{DstRng, DstSeed}, + shrink::shrink_by_removing, + workload::{ + commitlog_ops::{ + materialize_case, CommitlogInteraction, CommitlogWorkloadCase, CommitlogWorkloadFailure, + CommitlogWorkloadOutcome, + }, + table_ops::{ConnectionWriteState, TableScenario, TableScenarioId, TableWorkloadInteraction}, + }, +}; + +pub type RelationalDbCommitlogCase = CommitlogWorkloadCase; +pub type RelationalDbCommitlogFailure = CommitlogWorkloadFailure; +pub type RelationalDbCommitlogOutcome = CommitlogWorkloadOutcome; + +pub fn materialize_case_for_target( + seed: DstSeed, + scenario: TableScenarioId, + max_interactions: usize, +) -> RelationalDbCommitlogCase { + materialize_case(seed, scenario, max_interactions) +} + +pub fn save_case(path: impl AsRef, case: &RelationalDbCommitlogCase) -> anyhow::Result<()> { + save_json(path, case) +} + +pub fn load_case(path: impl AsRef) -> anyhow::Result { + load_json(path) +} + +pub fn run_case_detailed( + case: &RelationalDbCommitlogCase, +) -> Result { + info!( + "relational_db_commitlog start seed={} scenario={:?} interactions={} connections={}", + case.seed.0, + case.scenario, + case.interactions.len(), + case.num_connections + ); + let mut engine = RelationalDbCommitlogEngine::new(case.seed, &case.schema, case.num_connections) + .map_err(|err| failure_without_step(format!("bootstrap failed: {err}")))?; + + for (step_index, interaction) in case.interactions.iter().enumerate() { + trace!(step_index, ?interaction, "interaction"); + if let Err(reason) = engine.execute(interaction) { + engine.finish(); + warn!(step_index, %reason, "interaction failed"); + return Err(RelationalDbCommitlogFailure { + step_index, + reason, + interaction: Some(interaction.clone()), + }); + } + } + + let outcome = engine + .collect_outcome() + .map_err(|err| failure_without_step(err.to_string()))?; + engine.finish(); + info!( + applied_steps = outcome.applied_steps, + durable_commit_count = outcome.durable_commit_count, + replay_table_count = outcome.replay_table_count, + "relational_db_commitlog complete" + ); + Ok(outcome) +} + +pub fn run_generated_with_config_and_scenario( + seed: DstSeed, + scenario: TableScenarioId, + config: RunConfig, +) -> anyhow::Result { + let mut connection_rng = seed.fork(121).rng(); + let num_connections = connection_rng.index(3) + 1; + let mut schema_rng = seed.fork(122).rng(); + let schema = scenario.generate_schema(&mut schema_rng); + let mut stream = crate::workload::commitlog_ops::InteractionStream::new( + seed, + scenario, + schema.clone(), + num_connections, + config.max_interactions_or_default(usize::MAX), + ); + let mut engine = RelationalDbCommitlogEngine::new(seed, &schema, num_connections)?; + let deadline = config.deadline(); + let mut step_index = 0usize; + + loop { + if deadline.is_some_and(|deadline| Instant::now() >= deadline) { + stream.request_finish(); + } + let Some(interaction) = stream.next() else { + break; + }; + trace!(step_index, ?interaction, "streaming interaction"); + engine + .execute(&interaction) + .map_err(|reason| anyhow::anyhow!("workload failed at step {step_index}: {reason}"))?; + step_index = step_index.saturating_add(1); + } + + let outcome = engine.collect_outcome().map_err(anyhow::Error::msg)?; + engine.finish(); + Ok(outcome) +} + +pub fn shrink_failure( + case: &RelationalDbCommitlogCase, + failure: &RelationalDbCommitlogFailure, +) -> anyhow::Result { + shrink_by_removing( + case, + failure, + |case| { + let mut shrunk = case.clone(); + shrunk.interactions.truncate(failure.step_index.saturating_add(1)); + shrunk + }, + |case| case.interactions.len(), + |case, idx| { + let interaction = case.interactions.get(idx)?; + if !can_remove_interaction(interaction) { + return None; + } + let mut interactions = case.interactions.clone(); + interactions.remove(idx); + Some(RelationalDbCommitlogCase { + seed: case.seed, + scenario: case.scenario, + num_connections: case.num_connections, + schema: case.schema.clone(), + interactions, + }) + }, + |case| match run_case_detailed(case) { + Ok(_) => anyhow::bail!("case did not fail"), + Err(failure) => Ok(failure), + }, + |expected, candidate| expected.reason == candidate.reason, + ) +} + +fn can_remove_interaction(interaction: &CommitlogInteraction) -> bool { + match interaction { + CommitlogInteraction::Table(TableWorkloadInteraction::CommitTx { .. }) + | CommitlogInteraction::Table(TableWorkloadInteraction::RollbackTx { .. }) => false, + _ => true, + } +} + +#[derive(Clone, Debug)] +struct DynamicTableState { + version: u32, + table_id: TableId, +} + +/// Engine executing mixed table+lifecycle interactions while recording mocked durable history. +struct RelationalDbCommitlogEngine { + db: RelationalDB, + execution: ConnectionWriteState, + base_schema: SchemaPlan, + base_table_ids: Vec, + dynamic_tables: HashMap, + step: usize, + commitlog: MockCommitlogFs, + last_durable_snapshot: DurableSnapshot, + pending_snapshot_capture: bool, +} + +type DurableSnapshot = BTreeMap>; + +impl RelationalDbCommitlogEngine { + fn new(seed: DstSeed, schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { + let db = bootstrap_relational_db()?; + let mut this = Self { + db, + execution: ConnectionWriteState::new(num_connections), + base_schema: schema.clone(), + base_table_ids: Vec::with_capacity(schema.tables.len()), + dynamic_tables: HashMap::new(), + step: 0, + commitlog: MockCommitlogFs::new(seed.fork(700)), + last_durable_snapshot: BTreeMap::new(), + pending_snapshot_capture: false, + }; + this.initialize_program().map_err(anyhow::Error::msg)?; + this.install_base_schema().map_err(anyhow::Error::msg)?; + Ok(this) + } + + fn initialize_program(&mut self) -> Result<(), String> { + let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::Internal); + self.db + .set_initialized(&mut tx, Program::empty(HostType::Wasm.into())) + .map_err(|err| format!("set_initialized failed: {err}"))?; + self.commit_tx_capture(tx, "initialize") + } + + fn install_base_schema(&mut self) -> Result<(), String> { + let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + for table in &self.base_schema.tables { + let columns = table + .columns + .iter() + .enumerate() + .map(|(idx, col)| ColumnSchema::for_test(idx as u16, &col.name, col.ty.clone())) + .collect::>(); + let mut indexes = vec![IndexSchema::for_test( + format!("{}_id_idx", table.name), + BTreeAlgorithm::from(0), + )]; + for cols in &table.extra_indexes { + let cols_name = cols.iter().map(|col| format!("c{col}")).collect::>().join("_"); + indexes.push(IndexSchema::for_test( + format!("{}_{}_idx", table.name, cols_name), + BTreeAlgorithm::from(cols.iter().copied().collect::()), + )); + } + let constraints = vec![ConstraintSchema::unique_for_test( + format!("{}_id_unique", table.name), + 0, + )]; + let table_id = self + .db + .create_table( + &mut tx, + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(&table.name), + None, + columns, + indexes, + constraints, + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ), + ) + .map_err(|err| format!("create table '{}' failed: {err}", table.name))?; + self.base_table_ids.push(table_id); + } + self.commit_tx_capture(tx, "install base schema") + } + + fn execute(&mut self, interaction: &CommitlogInteraction) -> Result<(), String> { + self.step = self.step.saturating_add(1); + match interaction { + CommitlogInteraction::Table(op) => self.execute_table_op(op), + CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), + CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), + CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), + CommitlogInteraction::ChaosSync => self.sync_and_snapshot(true), + } + } + + fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String> { + trace!(step = self.step, ?interaction, "table interaction"); + match interaction { + TableWorkloadInteraction::BeginTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + if self.execution.tx_by_connection[*conn].is_some() { + return Err(format!("connection {conn} already has open transaction")); + } + if let Some(owner) = self.execution.active_writer { + return Err(format!( + "connection {conn} cannot begin write transaction while connection {owner} owns lock" + )); + } + self.execution.tx_by_connection[*conn] = + Some(self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests)); + self.execution.active_writer = Some(*conn); + Ok(()) + } + TableWorkloadInteraction::CommitTx { conn } => { + self.execution.ensure_writer_owner(*conn, "commit")?; + let tx = self.execution.tx_by_connection[*conn] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; + self.commit_tx_capture(tx, "commit interaction")?; + self.execution.active_writer = None; + self.capture_pending_snapshot_if_idle()?; + Ok(()) + } + TableWorkloadInteraction::RollbackTx { conn } => { + self.execution.ensure_writer_owner(*conn, "rollback")?; + let tx = self.execution.tx_by_connection[*conn] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; + let _ = self.db.rollback_mut_tx(tx); + self.execution.active_writer = None; + self.capture_pending_snapshot_if_idle()?; + Ok(()) + } + TableWorkloadInteraction::Insert { conn, table, row } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); + self.with_mut_tx(*conn, |engine, tx| { + let table_id = *engine + .base_table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + engine + .db + .insert(tx, table_id, &bsatn) + .map_err(|err| format!("insert failed: {err}"))?; + Ok(()) + })?; + if !in_tx { + self.sync_and_snapshot(false)?; + } + self.check_insert_select(*conn, *table, row) + } + TableWorkloadInteraction::Delete { conn, table, row } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); + self.with_mut_tx(*conn, |engine, tx| { + let table_id = *engine + .base_table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let deleted = engine.db.delete_by_rel(tx, table_id, [row.to_product_value()]); + if deleted != 1 { + return Err(format!("delete expected 1 row, got {deleted}")); + } + Ok(()) + })?; + if !in_tx { + self.sync_and_snapshot(false)?; + } + self.check_delete_select(*conn, *table, row) + } + } + } + + fn with_mut_tx( + &mut self, + conn: usize, + mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result<(), String>, + ) -> Result<(), String> { + self.execution.ensure_known_connection(conn)?; + if self.execution.tx_by_connection[conn].is_some() { + let mut tx = self.execution.tx_by_connection[conn] + .take() + .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; + f(self, &mut tx)?; + self.execution.tx_by_connection[conn] = Some(tx); + return Ok(()); + } + + if let Some(owner) = self.execution.active_writer { + return Err(format!( + "connection {conn} cannot auto-commit write while connection {owner} owns lock" + )); + } + + let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + self.execution.active_writer = Some(conn); + f(self, &mut tx)?; + self.commit_tx_capture(tx, "auto-commit write")?; + self.execution.active_writer = None; + self.capture_pending_snapshot_if_idle()?; + Ok(()) + } + + fn create_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + let conn = self.normalize_conn(conn); + debug!(step = self.step, conn, slot, "create dynamic table"); + self.with_mut_tx(conn, |engine, tx| { + if engine.dynamic_tables.contains_key(&slot) { + return Ok(()); + } + let name = dynamic_table_name(slot, 0); + let schema = dynamic_schema(&name, 0); + let table_id = engine + .db + .create_table(tx, schema) + .map_err(|err| format!("create dynamic table slot={slot} failed: {err}"))?; + engine + .dynamic_tables + .insert(slot, DynamicTableState { version: 0, table_id }); + Ok(()) + })?; + self.sync_and_snapshot(false) + } + + fn drop_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + let conn = self.normalize_conn(conn); + debug!(step = self.step, conn, slot, "drop dynamic table"); + self.with_mut_tx(conn, |engine, tx| { + let Some(state) = engine.dynamic_tables.remove(&slot) else { + return Ok(()); + }; + if let Err(err) = engine.db.drop_table(tx, state.table_id) { + let msg = err.to_string(); + if !msg.contains("not found") { + return Err(format!("drop dynamic table slot={slot} failed: {err}")); + } + } + Ok(()) + })?; + self.sync_and_snapshot(false) + } + + fn migrate_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + let conn = self.normalize_conn(conn); + debug!(step = self.step, conn, slot, "migrate dynamic table"); + self.with_mut_tx(conn, |engine, tx| { + let Some(state) = engine.dynamic_tables.get(&slot).cloned() else { + return Ok(()); + }; + let to_version = state.version.saturating_add(1); + let to_name = dynamic_table_name(slot, to_version); + let to_schema = dynamic_schema(&to_name, to_version); + let new_table_id = engine + .db + .create_table(tx, to_schema) + .map_err(|err| format!("migrate create new table slot={slot} failed: {err}"))?; + let existing_rows = engine + .db + .iter_mut(tx, state.table_id) + .map_err(|err| format!("migrate scan old table failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + for row in &existing_rows { + let mut migrated = row.clone(); + if to_version > 0 && migrated.values.len() < 3 { + migrated.values.push(AlgebraicValue::Bool(false)); + } + let bsatn = migrated.to_bsatn().map_err(|err| err.to_string())?; + engine + .db + .insert(tx, new_table_id, &bsatn) + .map_err(|err| format!("migrate copy row failed: {err}"))?; + } + if let Err(err) = engine.db.drop_table(tx, state.table_id) { + let msg = err.to_string(); + if !msg.contains("not found") { + return Err(format!("migrate drop old table slot={slot} failed: {err}")); + } + } + engine.dynamic_tables.insert( + slot, + DynamicTableState { + version: to_version, + table_id: new_table_id, + }, + ); + Ok(()) + })?; + self.sync_and_snapshot(false) + } + + fn normalize_conn(&self, conn: usize) -> usize { + self.execution.active_writer.unwrap_or(conn) + } + + fn commit_tx_capture(&mut self, tx: RelMutTx, context: &str) -> Result<(), String> { + let committed = self + .db + .commit_tx(tx) + .map_err(|err| format!("{context} commit failed: {err}"))?; + if let Some((offset, tx_data, _, _)) = committed { + let Some(encoded) = encode_txdata_for_commitlog(&tx_data) else { + trace!(step = self.step, context, "commit had no durable payload"); + return Ok(()); + }; + trace!(step = self.step, context, offset, "append tx to mock commitlog"); + self.commitlog + .append(offset, encoded) + .map_err(|err| format!("{context} append to mock commitlog failed: {err}"))?; + } + Ok(()) + } + + fn sync_and_snapshot(&mut self, forced: bool) -> Result<(), String> { + let advanced = self + .commitlog + .sync(forced) + .map_err(|err| format!("mock sync failed: {err}"))?; + trace!( + step = self.step, + forced, + advanced, + durable_count = self.commitlog.durable_count(), + "mock sync" + ); + if advanced { + if self.execution.active_writer.is_some() { + self.pending_snapshot_capture = true; + trace!("defer durable snapshot capture until writer releases"); + } else { + self.last_durable_snapshot = self.snapshot_tracked_tables()?; + self.pending_snapshot_capture = false; + debug!( + tables = self.last_durable_snapshot.len(), + "captured durable snapshot after sync" + ); + } + } + Ok(()) + } + + fn capture_pending_snapshot_if_idle(&mut self) -> Result<(), String> { + if self.pending_snapshot_capture && self.execution.active_writer.is_none() { + self.last_durable_snapshot = self.snapshot_tracked_tables()?; + self.pending_snapshot_capture = false; + } + Ok(()) + } + + fn table_id_for_index(&self, table: usize) -> Result { + self.base_table_ids + .get(table) + .copied() + .ok_or_else(|| format!("table {table} out of range")) + } + + fn lookup_base_row(&self, conn: usize, table: usize, id: u64) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + Ok(self + .db + .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + } else { + let tx = self.db.begin_tx(Workload::ForTests); + let found = self + .db + .iter_by_col_eq(&tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next(); + let _ = self.db.release_tx(tx); + Ok(found) + } + } + + fn check_insert_select(&self, conn: usize, table: usize, row: &SimRow) -> Result<(), String> { + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = self.lookup_base_row(conn, table, id)?; + if found != Some(row.clone()) { + return Err(format!( + "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" + )); + } + Ok(()) + } + + fn check_delete_select(&self, conn: usize, table: usize, row: &SimRow) -> Result<(), String> { + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if self.lookup_base_row(conn, table, id)?.is_some() { + return Err(format!( + "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" + )); + } + Ok(()) + } + + fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { + let tx = self.db.begin_tx(Workload::ForTests); + let mut rows = self + .db + .iter(&tx, table_id) + .map_err(|err| format!("scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + let _ = self.db.release_tx(tx); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } + + fn snapshot_tracked_tables(&self) -> Result { + let mut snap = BTreeMap::new(); + for (idx, table_id) in self.base_table_ids.iter().enumerate() { + let name = self + .base_schema + .tables + .get(idx) + .map(|t| t.name.clone()) + .ok_or_else(|| format!("base table index {idx} missing schema"))?; + snap.insert(name, self.collect_rows_by_id(*table_id)?); + } + for (slot, state) in &self.dynamic_tables { + let name = dynamic_table_name(*slot, state.version); + snap.insert(name, self.collect_rows_by_id(state.table_id)?); + } + Ok(snap) + } + + fn collect_outcome(&mut self) -> Result { + self.capture_pending_snapshot_if_idle()?; + self.sync_and_snapshot(true)?; + let history = MockHistory::from_durable(self.commitlog.durable_records())?; + let replayed = reopen_from_history(history)?; + debug!( + durable_commits = self.commitlog.durable_count(), + replay_tables = replayed.len(), + "replayed durable prefix" + ); + Ok(RelationalDbCommitlogOutcome { + applied_steps: self.step, + durable_commit_count: self.commitlog.durable_count(), + replay_table_count: replayed.len(), + }) + } + + fn finish(&mut self) { + for tx in &mut self.execution.tx_by_connection { + if let Some(tx) = tx.take() { + let _ = self.db.rollback_mut_tx(tx); + } + } + self.execution.active_writer = None; + } +} + +fn reopen_from_history(history: MockHistory) -> Result { + debug!("reopen relational db from mocked durable history"); + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + history, + None, + None, + PagePool::new_for_test(), + ) + .map_err(|err| format!("reopen from history failed: {err}"))?; + if !connected_clients.is_empty() { + return Err(format!( + "unexpected connected clients after replay: {connected_clients:?}" + )); + } + + let tx = db.begin_tx(Workload::ForTests); + let schemas = db + .get_all_tables(&tx) + .map_err(|err| format!("list tables after replay failed: {err}"))?; + let mut snapshot = BTreeMap::>::new(); + for schema in schemas { + let name = schema.table_name.to_string(); + if !is_user_dst_table(&name) { + continue; + } + let mut rows = db + .iter(&tx, schema.table_id) + .map_err(|err| format!("scan replay table '{name}' failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + snapshot.insert(name, rows); + } + let _ = db.release_tx(tx); + debug!(tables = snapshot.len(), "reopen snapshot collected"); + Ok(snapshot) +} + +fn is_user_dst_table(name: &str) -> bool { + !name.starts_with("st_") +} + +fn bootstrap_relational_db() -> anyhow::Result { + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + EmptyHistory::new(), + None, + None, + PagePool::new_for_test(), + )?; + assert_eq!(connected_clients.len(), 0); + Ok(db) +} + +fn dynamic_table_name(slot: u32, version: u32) -> String { + format!("dst_dynamic_slot_{slot}_v{version}") +} + +fn dynamic_schema(name: &str, version: u32) -> TableSchema { + let mut columns = vec![ + ColumnSchema::for_test(0, "id", AlgebraicType::U64), + ColumnSchema::for_test(1, "value", AlgebraicType::U64), + ]; + if version > 0 { + columns.push(ColumnSchema::for_test(2, "migrated", AlgebraicType::Bool)); + } + let indexes = vec![IndexSchema::for_test(format!("{name}_id_idx"), BTreeAlgorithm::from(0))]; + let constraints = vec![ConstraintSchema::unique_for_test(format!("{name}_id_unique"), 0)]; + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(name), + None, + columns, + indexes, + constraints, + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ) +} + +fn failure_without_step(reason: String) -> RelationalDbCommitlogFailure { + RelationalDbCommitlogFailure { + step_index: usize::MAX, + reason, + interaction: None, + } +} + +fn encode_txdata_for_commitlog(tx_data: &DatastoreTxData) -> Option { + let _tx_offset = tx_data.tx_offset()?; + let mut inserts: Box<_> = tx_data + .persistent_inserts() + .map(|(table_id, rowdata)| commitlog::payload::txdata::Ops { table_id, rowdata }) + .collect(); + inserts.sort_unstable_by_key(|ops| ops.table_id); + + let mut deletes: Box<_> = tx_data + .persistent_deletes() + .map(|(table_id, rowdata)| commitlog::payload::txdata::Ops { table_id, rowdata }) + .collect(); + deletes.sort_unstable_by_key(|ops| ops.table_id); + + let mut truncates: Box<[_]> = tx_data.persistent_truncates().collect(); + truncates.sort_unstable_by_key(|table_id| *table_id); + + Some(Txdata { + inputs: None, + outputs: None, + mutations: Some(commitlog::payload::txdata::Mutations { + inserts, + deletes, + truncates, + }), + }) +} + +/// Deterministic mocked file/commitlog layer with chaos. +struct MockCommitlogFs { + chaos_rng: DstRng, + pending: Vec<(u64, Txdata)>, + durable: Vec<(u64, Txdata)>, + commits_since_sync: usize, +} + +impl MockCommitlogFs { + fn new(seed: DstSeed) -> Self { + Self { + chaos_rng: seed.rng(), + pending: Vec::new(), + durable: Vec::new(), + commits_since_sync: 0, + } + } + + fn append(&mut self, tx_offset: u64, txdata: Txdata) -> Result<(), String> { + // deterministic append chaos: low-rate injected write failure + if self.chaos_rng.index(1000) < 6 { + warn!(tx_offset, "mock commitlog injected append error"); + return Err("injected append error".to_string()); + } + if let Some((last_offset, _)) = self.pending.last().or_else(|| self.durable.last()) + && tx_offset != last_offset.saturating_add(1) + { + return Err(format!( + "non-contiguous commitlog append: got={tx_offset} expected={}", + last_offset.saturating_add(1) + )); + } + self.pending.push((tx_offset, txdata)); + self.commits_since_sync = self.commits_since_sync.saturating_add(1); + trace!( + tx_offset, + pending = self.pending.len(), + durable = self.durable.len(), + commits_since_sync = self.commits_since_sync, + "mock commitlog append" + ); + Ok(()) + } + + fn sync(&mut self, forced: bool) -> Result { + if self.pending.is_empty() { + return Ok(false); + } + + // periodic delayed fsync behavior + let should_attempt = forced || self.commits_since_sync >= 3 || self.chaos_rng.index(100) < 30; + if !should_attempt { + trace!( + forced, + pending = self.pending.len(), + commits_since_sync = self.commits_since_sync, + "mock sync skipped (delay)" + ); + return Ok(false); + } + + // injected fsync miss: pretend sync happened but keep data pending + if !forced && self.chaos_rng.index(100) < 12 { + self.commits_since_sync = 0; + warn!( + pending = self.pending.len(), + "mock sync injected miss (no durable advance)" + ); + return Ok(false); + } + + let mut advanced = false; + for pending in self.pending.drain(..) { + self.durable.push(pending); + advanced = true; + } + self.commits_since_sync = 0; + debug!(durable = self.durable.len(), "mock sync advanced durable prefix"); + Ok(advanced) + } + + fn durable_records(&self) -> &[(u64, Txdata)] { + &self.durable + } + + fn durable_count(&self) -> usize { + self.durable.len() + } +} + +/// In-memory history used to replay exactly the durable commitlog prefix. +struct MockHistory(commitlog::commitlog::Generic); + +impl MockHistory { + fn from_durable(records: &[(u64, Txdata)]) -> Result { + let mut log = commitlog::commitlog::Generic::open(commitlog::repo::Memory::unlimited(), Default::default()) + .map_err(|err| format!("open in-memory commitlog failed: {err}"))?; + for (offset, txdata) in records { + log.commit([(*offset, txdata.clone())]) + .map_err(|err| format!("append durable tx offset={offset} failed: {err}"))?; + } + Ok(Self(log)) + } +} + +impl History for MockHistory { + type TxData = Txdata; + + fn fold_transactions_from(&self, offset: TxOffset, decoder: D) -> Result<(), D::Error> + where + D: commitlog::Decoder, + D::Error: From, + { + self.0.fold_transactions_from(offset, decoder) + } + + fn transactions_from<'a, D>( + &self, + offset: TxOffset, + decoder: &'a D, + ) -> impl Iterator, D::Error>> + where + D: commitlog::Decoder, + D::Error: From, + Self::TxData: 'a, + { + self.0.transactions_from(offset, decoder) + } + + fn tx_range_hint(&self) -> (TxOffset, Option) { + let min = self.0.min_committed_offset().unwrap_or_default(); + let max = self.0.max_committed_offset(); + (min, max) + } +} diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs new file mode 100644 index 00000000000..d703f5b3288 --- /dev/null +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -0,0 +1,131 @@ +//! Composite generator: reuse `table_ops` and interleave lifecycle + chaos ops. + +use std::collections::{BTreeSet, VecDeque}; + +use crate::{ + schema::SchemaPlan, + seed::{DstRng, DstSeed}, + workload::{ + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadCase}, + table_ops::{self, TableScenario, TableScenarioId}, + }, +}; + +/// Streaming composite interaction source for commitlog-oriented targets. +pub(crate) struct InteractionStream { + base: table_ops::InteractionStream, + rng: DstRng, + num_connections: usize, + next_slot: u32, + alive_slots: BTreeSet, + pending: VecDeque, +} + +impl InteractionStream { + pub fn new( + seed: DstSeed, + scenario: S, + schema: SchemaPlan, + num_connections: usize, + target_interactions: usize, + ) -> Self { + Self { + base: table_ops::InteractionStream::new(seed.fork(123), scenario, schema, num_connections, target_interactions), + rng: seed.fork(124).rng(), + num_connections, + next_slot: 0, + alive_slots: BTreeSet::new(), + pending: VecDeque::new(), + } + } + + pub fn request_finish(&mut self) { + self.base.request_finish(); + } + + fn fill_pending(&mut self) -> bool { + let Some(base_op) = self.base.next() else { + return false; + }; + self.pending.push_back(CommitlogInteraction::Table(base_op)); + + if self.rng.index(100) < 18 { + self.pending.push_back(CommitlogInteraction::ChaosSync); + } + + if self.rng.index(100) < 9 { + let conn = self.rng.index(self.num_connections); + let slot = self.next_slot; + self.next_slot = self.next_slot.saturating_add(1); + self.alive_slots.insert(slot); + self.pending.push_back(CommitlogInteraction::CreateDynamicTable { conn, slot }); + return true; + } + + if !self.alive_slots.is_empty() && self.rng.index(100) < 6 { + let conn = self.rng.index(self.num_connections); + let idx = self.rng.index(self.alive_slots.len()); + let slot = *self + .alive_slots + .iter() + .nth(idx) + .expect("slot index within alive set bounds"); + self.pending.push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); + } + + if !self.alive_slots.is_empty() && self.rng.index(100) < 5 { + let conn = self.rng.index(self.num_connections); + let idx = self.rng.index(self.alive_slots.len()); + let slot = *self + .alive_slots + .iter() + .nth(idx) + .expect("slot index within alive set bounds"); + self.alive_slots.remove(&slot); + self.pending.push_back(CommitlogInteraction::DropDynamicTable { conn, slot }); + } + + true + } +} + +impl Iterator for InteractionStream { + type Item = CommitlogInteraction; + + fn next(&mut self) -> Option { + loop { + if let Some(next) = self.pending.pop_front() { + return Some(next); + } + if !self.fill_pending() { + return None; + } + } + } +} + +pub(crate) fn materialize_case( + seed: DstSeed, + scenario: TableScenarioId, + max_interactions: usize, +) -> CommitlogWorkloadCase { + let mut connection_rng = seed.fork(121).rng(); + let num_connections = connection_rng.index(3) + 1; + let mut schema_rng = seed.fork(122).rng(); + let schema = scenario.generate_schema(&mut schema_rng); + let interactions = InteractionStream::new(seed, scenario, schema.clone(), num_connections, max_interactions) + .collect::>(); + + CommitlogWorkloadCase { + seed, + scenario, + num_connections, + schema, + interactions, + } +} + +#[allow(dead_code)] +pub(crate) fn base_schema(case: &CommitlogWorkloadCase) -> &SchemaPlan { + &case.schema +} diff --git a/crates/dst/src/workload/commitlog_ops/mod.rs b/crates/dst/src/workload/commitlog_ops/mod.rs new file mode 100644 index 00000000000..c6960b5c7f4 --- /dev/null +++ b/crates/dst/src/workload/commitlog_ops/mod.rs @@ -0,0 +1,7 @@ +//! Commitlog-oriented workload that composes `table_ops` with lifecycle/chaos. + +mod generation; +mod types; + +pub(crate) use generation::{materialize_case, InteractionStream}; +pub use types::{CommitlogInteraction, CommitlogWorkloadCase, CommitlogWorkloadFailure, CommitlogWorkloadOutcome}; diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs new file mode 100644 index 00000000000..d4c45ba66e8 --- /dev/null +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -0,0 +1,46 @@ +//! Serializable interaction model for relational-db + commitlog DST. + +use serde::{Deserialize, Serialize}; + +use crate::{schema::SchemaPlan, seed::DstSeed, workload::table_ops::TableWorkloadInteraction}; + +/// One interaction in the commitlog-oriented mixed workload. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum CommitlogInteraction { + /// Reused base workload interaction from `table_ops`. + Table(TableWorkloadInteraction), + /// Create a dynamic user table for a logical slot. + CreateDynamicTable { conn: usize, slot: u32 }, + /// Drop a previously created dynamic user table. + DropDynamicTable { conn: usize, slot: u32 }, + /// Migrate dynamic table schema for a slot. + MigrateDynamicTable { conn: usize, slot: u32 }, + /// Ask the mock commitlog file layer to run a sync attempt. + ChaosSync, +} + +/// Materialized case for deterministic replay and shrinking. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct CommitlogWorkloadCase { + pub seed: DstSeed, + pub scenario: crate::workload::table_ops::TableScenarioId, + pub num_connections: usize, + pub schema: SchemaPlan, + pub interactions: Vec, +} + +/// Successful run summary for commitlog target. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct CommitlogWorkloadOutcome { + pub applied_steps: usize, + pub durable_commit_count: usize, + pub replay_table_count: usize, +} + +/// Failure info for commitlog target execution. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct CommitlogWorkloadFailure { + pub step_index: usize, + pub reason: String, + pub interaction: Option, +} diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs index 3de504a5324..e9c13ce836c 100644 --- a/crates/dst/src/workload/mod.rs +++ b/crates/dst/src/workload/mod.rs @@ -1,3 +1,4 @@ //! Shared workload generators reused by multiple DST targets. +pub mod commitlog_ops; pub mod table_ops; From 1372b3ac07e02c4fa66ad79b18f29cdda2f6e5d6 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 23 Apr 2026 16:45:37 +0530 Subject: [PATCH 11/74] delete code --- crates/dst/Cargo.toml | 3 + crates/dst/src/bugbase.rs | 18 -- crates/dst/src/core/mod.rs | 77 +++++++ crates/dst/src/lib.rs | 7 +- crates/dst/src/main.rs | 217 +----------------- crates/dst/src/shrink.rs | 36 --- crates/dst/src/targets/datastore.rs | 36 +-- crates/dst/src/targets/descriptor.rs | 70 ++++++ crates/dst/src/targets/harness.rs | 93 +------- crates/dst/src/targets/mod.rs | 1 + crates/dst/src/targets/relational_db.rs | 40 +--- .../src/targets/relational_db_commitlog.rs | 130 +---------- .../src/workload/commitlog_ops/generation.rs | 64 +++--- crates/dst/src/workload/commitlog_ops/mod.rs | 4 +- .../dst/src/workload/commitlog_ops/types.rs | 20 +- .../dst/src/workload/table_ops/generation.rs | 31 ++- crates/dst/src/workload/table_ops/mod.rs | 6 +- crates/dst/src/workload/table_ops/runner.rs | 128 ++++------- crates/dst/src/workload/table_ops/types.rs | 49 ++-- 19 files changed, 306 insertions(+), 724 deletions(-) delete mode 100644 crates/dst/src/bugbase.rs create mode 100644 crates/dst/src/core/mod.rs delete mode 100644 crates/dst/src/shrink.rs create mode 100644 crates/dst/src/targets/descriptor.rs diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index acad3047ade..d3672c78f4b 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -21,6 +21,7 @@ serde.workspace = true serde_json.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.1.0" } +spacetimedb-commitlog.workspace = true spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.1.0" } spacetimedb-execution.workspace = true spacetimedb-lib.workspace = true @@ -28,3 +29,5 @@ spacetimedb-primitives.workspace = true spacetimedb-sats.workspace = true spacetimedb-schema = { workspace = true, features = ["test"] } spacetimedb-table.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true diff --git a/crates/dst/src/bugbase.rs b/crates/dst/src/bugbase.rs deleted file mode 100644 index 1fa4ac23a54..00000000000 --- a/crates/dst/src/bugbase.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! Generic persistence helpers for failure artifacts. - -use std::{fs, path::Path}; - -use serde::{de::DeserializeOwned, Serialize}; - -/// Writes any serializable value to disk as pretty JSON. -pub(crate) fn save_json(path: impl AsRef, value: &T) -> anyhow::Result<()> { - let body = serde_json::to_string_pretty(value)?; - fs::write(path, body)?; - Ok(()) -} - -/// Loads any JSON value written by [`save_json`]. -pub(crate) fn load_json(path: impl AsRef) -> anyhow::Result { - let body = fs::read_to_string(path)?; - Ok(serde_json::from_str(&body)?) -} diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs new file mode 100644 index 00000000000..177de8b6d4f --- /dev/null +++ b/crates/dst/src/core/mod.rs @@ -0,0 +1,77 @@ +//! Core abstractions for pluggable DST workloads, engines, and properties. + +use crate::{config::RunConfig, seed::DstSeed}; + +/// Pull-based deterministic interaction source. +pub trait NextInteractionSource { + type Interaction; + + fn next_interaction(&mut self) -> Option; + fn request_finish(&mut self); +} + +/// A workload plan executed on-demand through `next_interaction`. +pub trait WorkloadPlan { + type Interaction: Clone + Send + Sync + 'static; + fn next_interactions( + &self, + seed: DstSeed, + cfg: RunConfig, + ) -> Box>; +} + +/// Target execution contract over a workload interaction stream. +pub trait TargetEngine { + type Outcome; + type Error; + + fn execute_interaction(&mut self, interaction: &I) -> Result<(), Self::Error>; + fn finish(&mut self); + fn collect_outcome(&mut self) -> anyhow::Result; +} + +/// Target-owned property lifecycle hooks. +pub trait PropertySet { + type Error; + + fn on_interaction(&mut self, interaction: &I, step: usize) -> Result<(), Self::Error>; + fn on_finish(&mut self, outcome: &O) -> Result<(), Self::Error>; +} + +/// Shared streaming runner. +pub fn run_streaming( + mut source: S, + mut engine: E, + mut properties: P, + cfg: RunConfig, +) -> anyhow::Result +where + I: Clone, + S: NextInteractionSource, + E: TargetEngine, + P: PropertySet, +{ + let deadline = cfg.deadline(); + let mut step = 0usize; + loop { + if deadline.is_some_and(|d| std::time::Instant::now() >= d) { + source.request_finish(); + } + let Some(interaction) = source.next_interaction() else { + break; + }; + engine + .execute_interaction(&interaction) + .map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; + properties + .on_interaction(&interaction, step) + .map_err(|e| anyhow::anyhow!("property failed at step {step}: {e}"))?; + step = step.saturating_add(1); + } + engine.finish(); + let outcome = engine.collect_outcome()?; + properties + .on_finish(&outcome) + .map_err(|e| anyhow::anyhow!("finish property failed: {e}"))?; + Ok(outcome) +} diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index be2e06062c1..6b481361b11 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -6,17 +6,14 @@ //! - [`seed`] for deterministic seeds, //! - [`workload`] for scenario identifiers, //! - [`targets`] for the executable datastore / relational-db adapters. -//! -//! The `spacetimedb-dst` binary drives those pieces through `run`, `replay`, -//! and `shrink`. -mod bugbase; /// Shared run-budget configuration for DST targets. pub mod config; +/// Core traits/runners for pluggable workloads and targets. +pub mod core; mod schema; /// Stable seed and RNG utilities used to make runs reproducible. pub mod seed; -mod shrink; /// Concrete simulator targets. pub mod targets; /// Shared workload generators reused by multiple targets. diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index 59a67394a87..b23b49c89ac 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -1,13 +1,12 @@ -use std::{ - path::{Path, PathBuf}, - time::{SystemTime, UNIX_EPOCH}, -}; +use std::time::{SystemTime, UNIX_EPOCH}; use clap::{Args, Parser, Subcommand, ValueEnum}; use spacetimedb_dst::{ config::RunConfig, seed::DstSeed, - targets::{datastore, relational_db, relational_db_commitlog}, + targets::descriptor::{ + DatastoreDescriptor, RelationalDbCommitlogDescriptor, RelationalDbDescriptor, TargetDescriptor, + }, workload::table_ops::TableScenarioId, }; @@ -22,8 +21,6 @@ struct Cli { #[derive(Subcommand, Debug)] enum Command { Run(RunArgs), - Replay(ReplayArgs), - Shrink(ShrinkArgs), } #[derive(Args, Debug, Clone)] @@ -44,24 +41,6 @@ struct RunArgs { duration: Option, #[arg(long)] max_interactions: Option, - #[arg(long)] - save_case: Option, -} - -#[derive(Args, Debug)] -struct ReplayArgs { - #[command(flatten)] - target: TargetArgs, - path: PathBuf, -} - -#[derive(Args, Debug)] -struct ShrinkArgs { - #[command(flatten)] - target: TargetArgs, - path: PathBuf, - #[arg(long)] - save_shrunk: Option, } #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] @@ -92,8 +71,6 @@ fn main() -> anyhow::Result<()> { init_tracing(); match Cli::parse().command { Command::Run(args) => run_command(args), - Command::Replay(args) => replay_command(args), - Command::Shrink(args) => shrink_command(args), } } @@ -116,25 +93,9 @@ fn run_command(args: RunArgs) -> anyhow::Result<()> { let scenario = TableScenarioId::from(args.target.scenario); match args.target.target { - TargetKind::Datastore => run_datastore(seed, scenario, config, args.save_case), - TargetKind::RelationalDb => run_relational(seed, scenario, config, args.save_case), - TargetKind::RelationalDbCommitlog => run_relational_commitlog(seed, scenario, config, args.save_case), - } -} - -fn replay_command(args: ReplayArgs) -> anyhow::Result<()> { - match args.target.target { - TargetKind::Datastore => replay_datastore(&args.path), - TargetKind::RelationalDb => replay_relational(&args.path), - TargetKind::RelationalDbCommitlog => replay_relational_commitlog(&args.path), - } -} - -fn shrink_command(args: ShrinkArgs) -> anyhow::Result<()> { - match args.target.target { - TargetKind::Datastore => shrink_datastore(&args.path, args.save_shrunk.as_ref()), - TargetKind::RelationalDb => shrink_relational(&args.path, args.save_shrunk.as_ref()), - TargetKind::RelationalDbCommitlog => shrink_relational_commitlog(&args.path, args.save_shrunk.as_ref()), + TargetKind::Datastore => run_target::(seed, scenario, config), + TargetKind::RelationalDb => run_target::(seed, scenario, config), + TargetKind::RelationalDbCommitlog => run_target::(seed, scenario, config), } } @@ -160,170 +121,12 @@ fn build_config(duration: Option<&str>, max_interactions: Option) -> anyh } } -fn run_datastore( - seed: DstSeed, - scenario: TableScenarioId, - config: RunConfig, - save_case: Option, -) -> anyhow::Result<()> { - if save_case.is_some() { - anyhow::bail!("save-case is not supported in streaming run mode"); - } - let outcome = datastore::run_generated_with_config_and_scenario(seed, scenario, config)?; - println!( - "ok target=datastore seed={} tables={} row_counts={:?}", - seed.0, - outcome.final_rows.len(), - outcome.final_row_counts - ); - Ok(()) -} - -fn run_relational( +fn run_target>( seed: DstSeed, scenario: TableScenarioId, config: RunConfig, - save_case: Option, ) -> anyhow::Result<()> { - if save_case.is_some() { - anyhow::bail!("save-case is not supported in streaming run mode"); - } - let outcome = relational_db::run_generated_with_config_and_scenario(seed, scenario, config)?; - println!( - "ok target=relational_db seed={} tables={} row_counts={:?}", - seed.0, - outcome.final_rows.len(), - outcome.final_row_counts - ); + let line = D::run_streaming(seed, scenario, config)?; + println!("{line}"); Ok(()) } - -fn run_relational_commitlog( - seed: DstSeed, - scenario: TableScenarioId, - config: RunConfig, - save_case: Option, -) -> anyhow::Result<()> { - if save_case.is_some() { - anyhow::bail!("save-case is not supported in streaming run mode"); - } - let outcome = relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config)?; - println!( - "ok target=relational_db_commitlog seed={} steps={} durable_commits={} replay_tables={}", - seed.0, outcome.applied_steps, outcome.durable_commit_count, outcome.replay_table_count - ); - Ok(()) -} - -fn replay_datastore(path: &Path) -> anyhow::Result<()> { - let case = datastore::load_case(path)?; - replay_datastore_case(&case) -} - -fn replay_relational(path: &Path) -> anyhow::Result<()> { - let case = relational_db::load_case(path)?; - replay_relational_case(&case) -} - -fn replay_relational_commitlog(path: &Path) -> anyhow::Result<()> { - let case = relational_db_commitlog::load_case(path)?; - replay_relational_commitlog_case(&case) -} - -fn replay_datastore_case(case: &datastore::DatastoreSimulatorCase) -> anyhow::Result<()> { - match datastore::run_case_detailed(case) { - Ok(_) => { - println!( - "ok target=datastore seed={} steps={}", - case.seed.0, - case.interactions.len() - ); - Ok(()) - } - Err(failure) => { - println!( - "fail target=datastore seed={} step={} reason={}", - case.seed.0, failure.step_index, failure.reason - ); - anyhow::bail!("datastore case failed") - } - } -} - -fn replay_relational_case(case: &relational_db::RelationalDbSimulatorCase) -> anyhow::Result<()> { - match relational_db::run_case_detailed(case) { - Ok(_) => { - println!( - "ok target=relational_db seed={} steps={}", - case.seed.0, - case.interactions.len() - ); - Ok(()) - } - Err(failure) => { - println!( - "fail target=relational_db seed={} step={} reason={}", - case.seed.0, failure.step_index, failure.reason - ); - anyhow::bail!("relational_db case failed") - } - } -} - -fn replay_relational_commitlog_case(case: &relational_db_commitlog::RelationalDbCommitlogCase) -> anyhow::Result<()> { - match relational_db_commitlog::run_case_detailed(case) { - Ok(outcome) => { - println!( - "ok target=relational_db_commitlog seed={} steps={} durable_commits={} replay_tables={}", - case.seed.0, outcome.applied_steps, outcome.durable_commit_count, outcome.replay_table_count - ); - Ok(()) - } - Err(failure) => { - println!( - "fail target=relational_db_commitlog seed={} step={} reason={}", - case.seed.0, failure.step_index, failure.reason - ); - anyhow::bail!("relational_db_commitlog case failed") - } - } -} - -fn shrink_datastore(path: &Path, save_shrunk: Option<&PathBuf>) -> anyhow::Result<()> { - let case = datastore::load_case(path)?; - let failure = datastore::run_case_detailed(&case).expect_err("shrink needs failing datastore case"); - let shrunk = datastore::shrink_failure(&case, &failure)?; - let out = shrunk_path(path, save_shrunk); - datastore::save_case(&out, &shrunk)?; - println!("shrunk_case={}", out.display()); - Ok(()) -} - -fn shrink_relational(path: &Path, save_shrunk: Option<&PathBuf>) -> anyhow::Result<()> { - let case = relational_db::load_case(path)?; - let failure = relational_db::run_case_detailed(&case).expect_err("shrink needs failing relational_db case"); - let shrunk = relational_db::shrink_failure(&case, &failure)?; - let out = shrunk_path(path, save_shrunk); - relational_db::save_case(&out, &shrunk)?; - println!("shrunk_case={}", out.display()); - Ok(()) -} - -fn shrink_relational_commitlog(path: &Path, save_shrunk: Option<&PathBuf>) -> anyhow::Result<()> { - let case = relational_db_commitlog::load_case(path)?; - let failure = relational_db_commitlog::run_case_detailed(&case) - .expect_err("shrink needs failing relational_db_commitlog case"); - let shrunk = relational_db_commitlog::shrink_failure(&case, &failure)?; - let out = shrunk_path(path, save_shrunk); - relational_db_commitlog::save_case(&out, &shrunk)?; - println!("shrunk_case={}", out.display()); - Ok(()) -} - -fn shrunk_path(default_input: &Path, explicit: Option<&PathBuf>) -> PathBuf { - explicit.cloned().unwrap_or_else(|| { - let mut path = default_input.as_os_str().to_os_string(); - path.push(".shrunk.json"); - PathBuf::from(path) - }) -} diff --git a/crates/dst/src/shrink.rs b/crates/dst/src/shrink.rs deleted file mode 100644 index 6796c92c8fd..00000000000 --- a/crates/dst/src/shrink.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! Generic shrinking helpers for deterministic targets. - -/// Generic remove-and-replay shrink loop. -pub(crate) fn shrink_by_removing( - case: &C, - target_failure: &F, - truncate: impl Fn(&C) -> C, - len: impl Fn(&C) -> usize, - remove: impl Fn(&C, usize) -> Option, - replay_failure: impl Fn(&C) -> anyhow::Result, - same_failure: impl Fn(&F, &F) -> bool, -) -> anyhow::Result -where - C: Clone, -{ - let mut shrunk = truncate(case); - - let mut changed = true; - while changed { - changed = false; - for idx in (0..len(&shrunk)).rev() { - let Some(candidate) = remove(&shrunk, idx) else { - continue; - }; - let Ok(candidate_failure) = replay_failure(&candidate) else { - continue; - }; - if same_failure(target_failure, &candidate_failure) { - shrunk = candidate; - changed = true; - } - } - } - - Ok(shrunk) -} diff --git a/crates/dst/src/targets/datastore.rs b/crates/dst/src/targets/datastore.rs index 6dc6d6d831d..a45adcd4e2a 100644 --- a/crates/dst/src/targets/datastore.rs +++ b/crates/dst/src/targets/datastore.rs @@ -1,6 +1,6 @@ //! Randomized datastore simulator target built on the shared table workload. -use std::{ops::Bound, path::Path}; +use std::ops::Bound; use spacetimedb_datastore::{ execution_context::Workload, @@ -30,14 +30,11 @@ use crate::{ properties::{self, TargetPropertyAccess, TargetPropertyState}, }, workload::table_ops::{ - ConnectionWriteState, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, TableWorkloadExecutionFailure, - TableWorkloadInteraction, TableWorkloadOutcome, + ConnectionWriteState, TableScenarioId, TableWorkloadEngine, TableWorkloadInteraction, TableWorkloadOutcome, }, }; -pub type DatastoreSimulatorCase = TableWorkloadCase; pub type DatastoreSimulatorOutcome = TableWorkloadOutcome; -pub type DatastoreExecutionFailure = TableWorkloadExecutionFailure; type Interaction = TableWorkloadInteraction; struct DatastoreTarget; @@ -45,25 +42,11 @@ struct DatastoreTarget; impl TableTargetHarness for DatastoreTarget { type Engine = DatastoreEngine; - fn connection_seed_discriminator() -> u64 { - 17 - } - fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { DatastoreEngine::new(schema, num_connections) } } -pub fn materialize_case(seed: DstSeed, scenario: TableScenarioId, max_interactions: usize) -> DatastoreSimulatorCase { - harness::materialize_case::(seed, scenario, max_interactions) -} - -pub fn run_case_detailed( - case: &DatastoreSimulatorCase, -) -> Result { - harness::run_case_detailed::(case) -} - pub fn run_generated_with_config_and_scenario( seed: DstSeed, scenario: TableScenarioId, @@ -72,21 +55,6 @@ pub fn run_generated_with_config_and_scenario( harness::run_generated_with_config_and_scenario::(seed, scenario, config) } -pub fn save_case(path: impl AsRef, case: &DatastoreSimulatorCase) -> anyhow::Result<()> { - harness::save_case(path, case) -} - -pub fn load_case(path: impl AsRef) -> anyhow::Result { - harness::load_case(path) -} - -pub fn shrink_failure( - case: &DatastoreSimulatorCase, - failure: &DatastoreExecutionFailure, -) -> anyhow::Result { - harness::shrink_failure::(case, failure) -} - /// Concrete datastore execution harness for the shared table workload. struct DatastoreEngine { schema: SchemaPlan, diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs new file mode 100644 index 00000000000..cb0cd315cb5 --- /dev/null +++ b/crates/dst/src/targets/descriptor.rs @@ -0,0 +1,70 @@ +//! Target descriptor layer used by the CLI. + +use crate::{config::RunConfig, seed::DstSeed, workload::table_ops::TableScenarioId}; + +/// Descriptor contract: CLI talks to this, not per-target ad hoc handlers. +pub trait TargetDescriptor { + const NAME: &'static str; + type Scenario; + + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result; +} + +pub struct DatastoreDescriptor; + +impl TargetDescriptor for DatastoreDescriptor { + const NAME: &'static str = "datastore"; + type Scenario = TableScenarioId; + + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result { + let outcome = crate::targets::datastore::run_generated_with_config_and_scenario(seed, scenario, config)?; + Ok(format!( + "ok target={} seed={} tables={} row_counts={:?}", + Self::NAME, + seed.0, + outcome.final_rows.len(), + outcome.final_row_counts + )) + } + +} + +pub struct RelationalDbDescriptor; + +impl TargetDescriptor for RelationalDbDescriptor { + const NAME: &'static str = "relational_db"; + type Scenario = TableScenarioId; + + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result { + let outcome = crate::targets::relational_db::run_generated_with_config_and_scenario(seed, scenario, config)?; + Ok(format!( + "ok target={} seed={} tables={} row_counts={:?}", + Self::NAME, + seed.0, + outcome.final_rows.len(), + outcome.final_row_counts + )) + } + +} + +pub struct RelationalDbCommitlogDescriptor; + +impl TargetDescriptor for RelationalDbCommitlogDescriptor { + const NAME: &'static str = "relational_db_commitlog"; + type Scenario = TableScenarioId; + + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result { + let outcome = + crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config)?; + Ok(format!( + "ok target={} seed={} steps={} durable_commits={} replay_tables={}", + Self::NAME, + seed.0, + outcome.applied_steps, + outcome.durable_commit_count, + outcome.replay_table_count + )) + } + +} diff --git a/crates/dst/src/targets/harness.rs b/crates/dst/src/targets/harness.rs index 39d29b5483a..5cafd9bbfcf 100644 --- a/crates/dst/src/targets/harness.rs +++ b/crates/dst/src/targets/harness.rs @@ -1,61 +1,14 @@ -use std::path::Path; - use crate::{ - bugbase::{load_json, save_json}, config::RunConfig, schema::SchemaPlan, seed::DstSeed, - shrink::shrink_by_removing, - workload::table_ops::{ - execute_interactions, run_generated_with_engine, InteractionStream, TableScenario, TableScenarioId, - TableWorkloadCase, TableWorkloadEngine, TableWorkloadExecutionFailure, TableWorkloadOutcome, - }, + workload::table_ops::{run_generated_with_engine, TableScenarioId, TableWorkloadEngine, TableWorkloadOutcome}, }; pub(crate) trait TableTargetHarness { type Engine: TableWorkloadEngine; - fn connection_seed_discriminator() -> u64; fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result; - - fn can_remove_interaction(interaction: &crate::workload::table_ops::TableWorkloadInteraction) -> bool { - !matches!( - interaction, - crate::workload::table_ops::TableWorkloadInteraction::CommitTx { .. } - | crate::workload::table_ops::TableWorkloadInteraction::RollbackTx { .. } - ) - } -} - -pub(crate) fn materialize_case( - seed: DstSeed, - scenario: TableScenarioId, - max_interactions: usize, -) -> TableWorkloadCase { - let mut rng = seed.fork(T::connection_seed_discriminator()).rng(); - let num_connections = rng.index(3) + 1; - let schema = scenario.generate_schema(&mut rng); - let interactions = - InteractionStream::new(seed, scenario, schema.clone(), num_connections, max_interactions).collect(); - TableWorkloadCase { - seed, - scenario, - num_connections, - schema, - interactions, - } -} - -pub(crate) fn run_case_detailed( - case: &TableWorkloadCase, -) -> Result { - execute_interactions( - &case.scenario, - &case.schema, - case.num_connections, - case.interactions.clone(), - T::build_engine, - ) } pub(crate) fn run_generated_with_config_and_scenario( @@ -65,47 +18,3 @@ pub(crate) fn run_generated_with_config_and_scenario( ) -> anyhow::Result { run_generated_with_engine(seed, scenario, config, T::build_engine) } - -pub(crate) fn save_case(path: impl AsRef, case: &TableWorkloadCase) -> anyhow::Result<()> { - save_json(path, case) -} - -pub(crate) fn load_case(path: impl AsRef) -> anyhow::Result { - load_json(path) -} - -pub(crate) fn shrink_failure( - case: &TableWorkloadCase, - failure: &TableWorkloadExecutionFailure, -) -> anyhow::Result { - shrink_by_removing( - case, - failure, - |case| { - let mut shrunk = case.clone(); - shrunk.interactions.truncate(failure.step_index.saturating_add(1)); - shrunk - }, - |case| case.interactions.len(), - |case, idx| { - let interaction = case.interactions.get(idx)?; - if !T::can_remove_interaction(interaction) { - return None; - } - let mut interactions = case.interactions.clone(); - interactions.remove(idx); - Some(TableWorkloadCase { - seed: case.seed, - scenario: case.scenario, - num_connections: case.num_connections, - schema: case.schema.clone(), - interactions, - }) - }, - |case| match run_case_detailed::(case) { - Ok(_) => anyhow::bail!("case did not fail"), - Err(failure) => Ok(failure), - }, - |expected, candidate| expected.reason == candidate.reason, - ) -} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index 52967c78b01..67376f8bbf9 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -1,6 +1,7 @@ //! Concrete simulation targets. pub mod datastore; +pub mod descriptor; pub(crate) mod harness; pub(crate) mod properties; pub mod relational_db; diff --git a/crates/dst/src/targets/relational_db.rs b/crates/dst/src/targets/relational_db.rs index dff2a317b10..864196e3df8 100644 --- a/crates/dst/src/targets/relational_db.rs +++ b/crates/dst/src/targets/relational_db.rs @@ -1,6 +1,6 @@ //! Basic RelationalDB simulator target using the shared table workload. -use std::{ops::Bound, path::Path}; +use std::ops::Bound; use spacetimedb_core::{ db::relational_db::{MutTx as RelMutTx, RelationalDB}, @@ -33,14 +33,11 @@ use crate::{ properties::{self, TargetPropertyAccess, TargetPropertyState}, }, workload::table_ops::{ - ConnectionWriteState, TableScenarioId, TableWorkloadCase, TableWorkloadEngine, TableWorkloadExecutionFailure, - TableWorkloadInteraction, TableWorkloadOutcome, + ConnectionWriteState, TableScenarioId, TableWorkloadEngine, TableWorkloadInteraction, TableWorkloadOutcome, }, }; -pub type RelationalDbSimulatorCase = TableWorkloadCase; pub type RelationalDbSimulatorOutcome = TableWorkloadOutcome; -pub type RelationalDbExecutionFailure = TableWorkloadExecutionFailure; type RelationalDbInteraction = TableWorkloadInteraction; struct RelationalDbTarget; @@ -48,29 +45,11 @@ struct RelationalDbTarget; impl TableTargetHarness for RelationalDbTarget { type Engine = RelationalDbEngine; - fn connection_seed_discriminator() -> u64 { - 31 - } - fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { RelationalDbEngine::new(schema, num_connections) } } -pub fn materialize_case( - seed: DstSeed, - scenario: TableScenarioId, - max_interactions: usize, -) -> RelationalDbSimulatorCase { - harness::materialize_case::(seed, scenario, max_interactions) -} - -pub fn run_case_detailed( - case: &RelationalDbSimulatorCase, -) -> Result { - harness::run_case_detailed::(case) -} - pub fn run_generated_with_config_and_scenario( seed: DstSeed, scenario: TableScenarioId, @@ -79,21 +58,6 @@ pub fn run_generated_with_config_and_scenario( harness::run_generated_with_config_and_scenario::(seed, scenario, config) } -pub fn save_case(path: impl AsRef, case: &RelationalDbSimulatorCase) -> anyhow::Result<()> { - harness::save_case(path, case) -} - -pub fn load_case(path: impl AsRef) -> anyhow::Result { - harness::load_case(path) -} - -pub fn shrink_failure( - case: &RelationalDbSimulatorCase, - failure: &RelationalDbExecutionFailure, -) -> anyhow::Result { - harness::shrink_failure::(case, failure) -} - /// Concrete `RelationalDB` execution harness for the shared table workload. struct RelationalDbEngine { schema: SchemaPlan, diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 29a93dc7f5d..54cc26d2ecb 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -2,7 +2,6 @@ use std::{ collections::{BTreeMap, HashMap}, - path::Path, time::Instant, }; @@ -31,79 +30,18 @@ use spacetimedb_table::page_pool::PagePool; use tracing::{debug, info, trace, warn}; use crate::{ - bugbase::{load_json, save_json}, config::RunConfig, + core::NextInteractionSource, schema::{SchemaPlan, SimRow}, seed::{DstRng, DstSeed}, - shrink::shrink_by_removing, workload::{ - commitlog_ops::{ - materialize_case, CommitlogInteraction, CommitlogWorkloadCase, CommitlogWorkloadFailure, - CommitlogWorkloadOutcome, - }, + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, table_ops::{ConnectionWriteState, TableScenario, TableScenarioId, TableWorkloadInteraction}, }, }; -pub type RelationalDbCommitlogCase = CommitlogWorkloadCase; -pub type RelationalDbCommitlogFailure = CommitlogWorkloadFailure; pub type RelationalDbCommitlogOutcome = CommitlogWorkloadOutcome; -pub fn materialize_case_for_target( - seed: DstSeed, - scenario: TableScenarioId, - max_interactions: usize, -) -> RelationalDbCommitlogCase { - materialize_case(seed, scenario, max_interactions) -} - -pub fn save_case(path: impl AsRef, case: &RelationalDbCommitlogCase) -> anyhow::Result<()> { - save_json(path, case) -} - -pub fn load_case(path: impl AsRef) -> anyhow::Result { - load_json(path) -} - -pub fn run_case_detailed( - case: &RelationalDbCommitlogCase, -) -> Result { - info!( - "relational_db_commitlog start seed={} scenario={:?} interactions={} connections={}", - case.seed.0, - case.scenario, - case.interactions.len(), - case.num_connections - ); - let mut engine = RelationalDbCommitlogEngine::new(case.seed, &case.schema, case.num_connections) - .map_err(|err| failure_without_step(format!("bootstrap failed: {err}")))?; - - for (step_index, interaction) in case.interactions.iter().enumerate() { - trace!(step_index, ?interaction, "interaction"); - if let Err(reason) = engine.execute(interaction) { - engine.finish(); - warn!(step_index, %reason, "interaction failed"); - return Err(RelationalDbCommitlogFailure { - step_index, - reason, - interaction: Some(interaction.clone()), - }); - } - } - - let outcome = engine - .collect_outcome() - .map_err(|err| failure_without_step(err.to_string()))?; - engine.finish(); - info!( - applied_steps = outcome.applied_steps, - durable_commit_count = outcome.durable_commit_count, - replay_table_count = outcome.replay_table_count, - "relational_db_commitlog complete" - ); - Ok(outcome) -} - pub fn run_generated_with_config_and_scenario( seed: DstSeed, scenario: TableScenarioId, @@ -113,7 +51,7 @@ pub fn run_generated_with_config_and_scenario( let num_connections = connection_rng.index(3) + 1; let mut schema_rng = seed.fork(122).rng(); let schema = scenario.generate_schema(&mut schema_rng); - let mut stream = crate::workload::commitlog_ops::InteractionStream::new( + let mut generator = crate::workload::commitlog_ops::NextInteractionGeneratorComposite::new( seed, scenario, schema.clone(), @@ -126,9 +64,9 @@ pub fn run_generated_with_config_and_scenario( loop { if deadline.is_some_and(|deadline| Instant::now() >= deadline) { - stream.request_finish(); + generator.request_finish(); } - let Some(interaction) = stream.next() else { + let Some(interaction) = generator.next_interaction() else { break; }; trace!(step_index, ?interaction, "streaming interaction"); @@ -140,53 +78,15 @@ pub fn run_generated_with_config_and_scenario( let outcome = engine.collect_outcome().map_err(anyhow::Error::msg)?; engine.finish(); + info!( + applied_steps = outcome.applied_steps, + durable_commit_count = outcome.durable_commit_count, + replay_table_count = outcome.replay_table_count, + "relational_db_commitlog complete" + ); Ok(outcome) } -pub fn shrink_failure( - case: &RelationalDbCommitlogCase, - failure: &RelationalDbCommitlogFailure, -) -> anyhow::Result { - shrink_by_removing( - case, - failure, - |case| { - let mut shrunk = case.clone(); - shrunk.interactions.truncate(failure.step_index.saturating_add(1)); - shrunk - }, - |case| case.interactions.len(), - |case, idx| { - let interaction = case.interactions.get(idx)?; - if !can_remove_interaction(interaction) { - return None; - } - let mut interactions = case.interactions.clone(); - interactions.remove(idx); - Some(RelationalDbCommitlogCase { - seed: case.seed, - scenario: case.scenario, - num_connections: case.num_connections, - schema: case.schema.clone(), - interactions, - }) - }, - |case| match run_case_detailed(case) { - Ok(_) => anyhow::bail!("case did not fail"), - Err(failure) => Ok(failure), - }, - |expected, candidate| expected.reason == candidate.reason, - ) -} - -fn can_remove_interaction(interaction: &CommitlogInteraction) -> bool { - match interaction { - CommitlogInteraction::Table(TableWorkloadInteraction::CommitTx { .. }) - | CommitlogInteraction::Table(TableWorkloadInteraction::RollbackTx { .. }) => false, - _ => true, - } -} - #[derive(Clone, Debug)] struct DynamicTableState { version: u32, @@ -746,14 +646,6 @@ fn dynamic_schema(name: &str, version: u32) -> TableSchema { ) } -fn failure_without_step(reason: String) -> RelationalDbCommitlogFailure { - RelationalDbCommitlogFailure { - step_index: usize::MAX, - reason, - interaction: None, - } -} - fn encode_txdata_for_commitlog(tx_data: &DatastoreTxData) -> Option { let _tx_offset = tx_data.tx_offset()?; let mut inserts: Box<_> = tx_data diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index d703f5b3288..61ca39fe1c3 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -3,17 +3,18 @@ use std::collections::{BTreeSet, VecDeque}; use crate::{ + core::NextInteractionSource, schema::SchemaPlan, seed::{DstRng, DstSeed}, workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadCase}, - table_ops::{self, TableScenario, TableScenarioId}, + commitlog_ops::CommitlogInteraction, + table_ops::{NextInteractionGenerator, TableScenario}, }, }; /// Streaming composite interaction source for commitlog-oriented targets. -pub(crate) struct InteractionStream { - base: table_ops::InteractionStream, +pub(crate) struct NextInteractionGeneratorComposite { + base: NextInteractionGenerator, rng: DstRng, num_connections: usize, next_slot: u32, @@ -21,7 +22,7 @@ pub(crate) struct InteractionStream { pending: VecDeque, } -impl InteractionStream { +impl NextInteractionGeneratorComposite { pub fn new( seed: DstSeed, scenario: S, @@ -30,7 +31,7 @@ impl InteractionStream { target_interactions: usize, ) -> Self { Self { - base: table_ops::InteractionStream::new(seed.fork(123), scenario, schema, num_connections, target_interactions), + base: NextInteractionGenerator::new(seed.fork(123), scenario, schema, num_connections, target_interactions), rng: seed.fork(124).rng(), num_connections, next_slot: 0, @@ -58,7 +59,8 @@ impl InteractionStream { let slot = self.next_slot; self.next_slot = self.next_slot.saturating_add(1); self.alive_slots.insert(slot); - self.pending.push_back(CommitlogInteraction::CreateDynamicTable { conn, slot }); + self.pending + .push_back(CommitlogInteraction::CreateDynamicTable { conn, slot }); return true; } @@ -70,7 +72,8 @@ impl InteractionStream { .iter() .nth(idx) .expect("slot index within alive set bounds"); - self.pending.push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); + self.pending + .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); } if !self.alive_slots.is_empty() && self.rng.index(100) < 5 { @@ -82,17 +85,16 @@ impl InteractionStream { .nth(idx) .expect("slot index within alive set bounds"); self.alive_slots.remove(&slot); - self.pending.push_back(CommitlogInteraction::DropDynamicTable { conn, slot }); + self.pending + .push_back(CommitlogInteraction::DropDynamicTable { conn, slot }); } true } } -impl Iterator for InteractionStream { - type Item = CommitlogInteraction; - - fn next(&mut self) -> Option { +impl NextInteractionGeneratorComposite { + pub fn pull_next_interaction(&mut self) -> Option { loop { if let Some(next) = self.pending.pop_front() { return Some(next); @@ -104,28 +106,22 @@ impl Iterator for InteractionStream { } } -pub(crate) fn materialize_case( - seed: DstSeed, - scenario: TableScenarioId, - max_interactions: usize, -) -> CommitlogWorkloadCase { - let mut connection_rng = seed.fork(121).rng(); - let num_connections = connection_rng.index(3) + 1; - let mut schema_rng = seed.fork(122).rng(); - let schema = scenario.generate_schema(&mut schema_rng); - let interactions = InteractionStream::new(seed, scenario, schema.clone(), num_connections, max_interactions) - .collect::>(); - - CommitlogWorkloadCase { - seed, - scenario, - num_connections, - schema, - interactions, +impl NextInteractionSource for NextInteractionGeneratorComposite { + type Interaction = CommitlogInteraction; + + fn next_interaction(&mut self) -> Option { + self.pull_next_interaction() + } + + fn request_finish(&mut self) { + Self::request_finish(self); } } -#[allow(dead_code)] -pub(crate) fn base_schema(case: &CommitlogWorkloadCase) -> &SchemaPlan { - &case.schema +impl Iterator for NextInteractionGeneratorComposite { + type Item = CommitlogInteraction; + + fn next(&mut self) -> Option { + self.pull_next_interaction() + } } diff --git a/crates/dst/src/workload/commitlog_ops/mod.rs b/crates/dst/src/workload/commitlog_ops/mod.rs index c6960b5c7f4..8c8191f7489 100644 --- a/crates/dst/src/workload/commitlog_ops/mod.rs +++ b/crates/dst/src/workload/commitlog_ops/mod.rs @@ -3,5 +3,5 @@ mod generation; mod types; -pub(crate) use generation::{materialize_case, InteractionStream}; -pub use types::{CommitlogInteraction, CommitlogWorkloadCase, CommitlogWorkloadFailure, CommitlogWorkloadOutcome}; +pub(crate) use generation::NextInteractionGeneratorComposite; +pub use types::{CommitlogInteraction, CommitlogWorkloadOutcome}; diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index d4c45ba66e8..b473d554049 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; -use crate::{schema::SchemaPlan, seed::DstSeed, workload::table_ops::TableWorkloadInteraction}; +use crate::workload::table_ops::TableWorkloadInteraction; /// One interaction in the commitlog-oriented mixed workload. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] @@ -19,16 +19,6 @@ pub enum CommitlogInteraction { ChaosSync, } -/// Materialized case for deterministic replay and shrinking. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct CommitlogWorkloadCase { - pub seed: DstSeed, - pub scenario: crate::workload::table_ops::TableScenarioId, - pub num_connections: usize, - pub schema: SchemaPlan, - pub interactions: Vec, -} - /// Successful run summary for commitlog target. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct CommitlogWorkloadOutcome { @@ -36,11 +26,3 @@ pub struct CommitlogWorkloadOutcome { pub durable_commit_count: usize, pub replay_table_count: usize, } - -/// Failure info for commitlog target execution. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct CommitlogWorkloadFailure { - pub step_index: usize, - pub reason: String, - pub interaction: Option, -} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index aff08df65c3..8e6fb110bbb 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -1,6 +1,7 @@ use std::collections::VecDeque; use crate::{ + core::NextInteractionSource, schema::SchemaPlan, seed::{DstRng, DstSeed}, }; @@ -13,7 +14,7 @@ use super::{model::GenerationModel, TableScenario, TableWorkloadInteraction}; /// duration runs do not need to materialize the full interaction list in /// memory up front. #[derive(Clone, Debug)] -pub struct InteractionStream { +pub struct NextInteractionGenerator { // Deterministic source for all planner choices. rng: DstRng, // Scenario-specific workload policy layered on top of the shared model. @@ -103,7 +104,7 @@ impl<'a> ScenarioPlanner<'a> { } } -impl InteractionStream { +impl NextInteractionGenerator { pub fn new( seed: DstSeed, scenario: S, @@ -161,10 +162,8 @@ impl InteractionStream { } } -impl Iterator for InteractionStream { - type Item = TableWorkloadInteraction; - - fn next(&mut self) -> Option { +impl NextInteractionGenerator { + pub fn pull_next_interaction(&mut self) -> Option { loop { // Scenario planning fills `pending` in bursts, but the iterator // surface stays one interaction at a time. @@ -181,3 +180,23 @@ impl Iterator for InteractionStream { } } } + +impl NextInteractionSource for NextInteractionGenerator { + type Interaction = TableWorkloadInteraction; + + fn next_interaction(&mut self) -> Option { + self.pull_next_interaction() + } + + fn request_finish(&mut self) { + Self::request_finish(self); + } +} + +impl Iterator for NextInteractionGenerator { + type Item = TableWorkloadInteraction; + + fn next(&mut self) -> Option { + self.pull_next_interaction() + } +} diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index c25cb02895e..a3942348df1 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -6,8 +6,8 @@ mod runner; mod scenarios; mod types; -pub(crate) use generation::InteractionStream; -pub(crate) use runner::{execute_interactions, run_generated_with_engine}; +pub(crate) use generation::NextInteractionGenerator; +pub(crate) use runner::run_generated_with_engine; pub use scenarios::TableScenarioId; pub(crate) use types::{ConnectionWriteState, TableScenario, TableWorkloadEngine}; -pub use types::{TableWorkloadCase, TableWorkloadExecutionFailure, TableWorkloadInteraction, TableWorkloadOutcome}; +pub use types::{TableWorkloadInteraction, TableWorkloadOutcome}; diff --git a/crates/dst/src/workload/table_ops/runner.rs b/crates/dst/src/workload/table_ops/runner.rs index d17846be351..c77c90221fc 100644 --- a/crates/dst/src/workload/table_ops/runner.rs +++ b/crates/dst/src/workload/table_ops/runner.rs @@ -1,56 +1,52 @@ -use std::time::Instant; - -use crate::{config::RunConfig, schema::SchemaPlan, seed::DstSeed}; +use crate::{ + config::RunConfig, + core::{self, PropertySet}, + schema::SchemaPlan, + seed::DstSeed, +}; use super::{ - model::ExpectedModel, InteractionStream, TableScenario, TableWorkloadEngine, TableWorkloadExecutionFailure, - TableWorkloadInteraction, TableWorkloadOutcome, + model::ExpectedModel, NextInteractionGenerator, TableScenario, TableWorkloadEngine, TableWorkloadInteraction, + TableWorkloadOutcome, }; -pub fn execute_interactions( - scenario: &S, - schema: &SchemaPlan, - num_connections: usize, - interactions: I, - make_engine: impl FnOnce(&SchemaPlan, usize) -> anyhow::Result, -) -> Result -where - S: TableScenario, - E: TableWorkloadEngine, - I: IntoIterator, -{ - let mut engine = - make_engine(schema, num_connections).map_err(|err| failure_without_step(format!("bootstrap failed: {err}")))?; - let mut expected = ExpectedModel::new(schema.tables.len(), num_connections); +struct TablePropertyRuntime { + scenario: S, + schema: SchemaPlan, + expected: ExpectedModel, +} - for (step_index, interaction) in interactions.into_iter().enumerate() { - engine - .execute(&interaction) - .map_err(|reason| TableWorkloadExecutionFailure { - step_index, - reason, - interaction: Some(interaction.clone()), - })?; - expected.apply(&interaction); +impl TablePropertyRuntime { + fn new(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self { + let table_count = schema.tables.len(); + Self { + scenario, + schema, + expected: ExpectedModel::new(table_count, num_connections), + } } +} - engine.finish(); - let outcome = engine - .collect_outcome() - .map_err(|err| failure_without_step(format!("collect outcome failed: {err}")))?; - let expected_rows = expected.committed_rows(); - if outcome.final_rows != expected_rows { - return Err(failure_without_step(format!( - "final datastore state mismatch: expected={expected_rows:?} actual={:?}", - outcome.final_rows - ))); - } +impl PropertySet for TablePropertyRuntime { + type Error = String; - scenario - .validate_outcome(schema, &outcome) - .map_err(|err| failure_without_step(format!("scenario invariant failed: {err}")))?; + fn on_interaction(&mut self, interaction: &TableWorkloadInteraction, _step: usize) -> Result<(), Self::Error> { + self.expected.apply(interaction); + Ok(()) + } - Ok(outcome) + fn on_finish(&mut self, outcome: &TableWorkloadOutcome) -> Result<(), Self::Error> { + let expected_rows = self.expected.clone().committed_rows(); + if outcome.final_rows != expected_rows { + return Err(format!( + "final datastore state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + )); + } + self.scenario + .validate_outcome(&self.schema, outcome) + .map_err(|err| format!("scenario invariant failed: {err}")) + } } pub fn run_generated_with_engine( @@ -66,50 +62,14 @@ where let mut rng = seed.fork(17).rng(); let num_connections = rng.index(3) + 1; let schema = scenario.generate_schema(&mut rng); - let mut stream = InteractionStream::new( + let generator = NextInteractionGenerator::new( seed, scenario.clone(), schema.clone(), num_connections, config.max_interactions_or_default(usize::MAX), ); - let mut engine = make_engine(&schema, num_connections)?; - let mut expected = ExpectedModel::new(schema.tables.len(), num_connections); - let deadline = config.deadline(); - - let mut step_index = 0usize; - loop { - if deadline.is_some_and(|deadline| Instant::now() >= deadline) { - stream.request_finish(); - } - - let Some(interaction) = stream.next() else { - break; - }; - engine - .execute(&interaction) - .map_err(|reason| anyhow::anyhow!("workload failed at step {step_index}: {reason}"))?; - expected.apply(&interaction); - step_index = step_index.saturating_add(1); - } - - engine.finish(); - let outcome = engine.collect_outcome()?; - let expected_rows = expected.committed_rows(); - if outcome.final_rows != expected_rows { - anyhow::bail!( - "final datastore state mismatch: expected={expected_rows:?} actual={:?}", - outcome.final_rows - ); - } - scenario.validate_outcome(&schema, &outcome)?; - Ok(outcome) -} - -fn failure_without_step(reason: String) -> TableWorkloadExecutionFailure { - TableWorkloadExecutionFailure { - step_index: usize::MAX, - reason, - interaction: None, - } + let engine = make_engine(&schema, num_connections)?; + let properties = TablePropertyRuntime::new(scenario, schema, num_connections); + core::run_streaming(generator, engine, properties, config) } diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index d4ac0dd726f..3bc568b2bf2 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -1,11 +1,12 @@ use serde::{Deserialize, Serialize}; use crate::{ + core::TargetEngine, schema::{SchemaPlan, SimRow}, seed::DstRng, }; -use super::{generation::ScenarioPlanner, scenarios::TableScenarioId}; +use super::generation::ScenarioPlanner; /// Scenario hook for shared table-oriented workloads. /// @@ -17,21 +18,6 @@ pub(crate) trait TableScenario: Clone { fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize); } -/// Materialized shared table-workload case reused by multiple targets. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct TableWorkloadCase { - /// Seed used to derive schema and workload decisions. - pub seed: crate::seed::DstSeed, - /// Shared workload scenario identifier. - pub(crate) scenario: TableScenarioId, - /// Number of simulated client connections in the run. - pub(crate) num_connections: usize, - /// Initial schema installed into target before replaying interactions. - pub(crate) schema: SchemaPlan, - /// Materialized interaction trace for replay and shrinking. - pub interactions: Vec, -} - /// One generated workload step. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum TableWorkloadInteraction { @@ -51,17 +37,6 @@ pub struct TableWorkloadOutcome { pub final_rows: Vec>, } -/// First failing interaction observed while executing a generated workload. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct TableWorkloadExecutionFailure { - /// Zero-based position of the failing interaction. - pub step_index: usize, - /// Target-provided error message. - pub reason: String, - /// Interaction that triggered the failure. - pub(crate) interaction: Option, -} - /// Minimal engine interface implemented by concrete table-oriented targets. pub(crate) trait TableWorkloadEngine { fn execute(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String>; @@ -69,6 +44,26 @@ pub(crate) trait TableWorkloadEngine { fn finish(&mut self); } +impl TargetEngine for T +where + T: TableWorkloadEngine, +{ + type Outcome = TableWorkloadOutcome; + type Error = String; + + fn execute_interaction(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), Self::Error> { + self.execute(interaction) + } + + fn finish(&mut self) { + TableWorkloadEngine::finish(self); + } + + fn collect_outcome(&mut self) -> anyhow::Result { + TableWorkloadEngine::collect_outcome(self) + } +} + /// Per-connection write transaction bookkeeping shared by locking targets. pub(crate) struct ConnectionWriteState { /// Open mutable transaction handle for each simulated connection. From dd33707b35c560cd763a26e68e705543d67f0105 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 23 Apr 2026 17:34:09 +0530 Subject: [PATCH 12/74] pluggable property --- crates/dst/src/targets/datastore.rs | 32 +- crates/dst/src/targets/properties.rs | 552 +++++++++++------- crates/dst/src/targets/relational_db.rs | 32 +- .../src/targets/relational_db_commitlog.rs | 120 +++- 4 files changed, 467 insertions(+), 269 deletions(-) diff --git a/crates/dst/src/targets/datastore.rs b/crates/dst/src/targets/datastore.rs index a45adcd4e2a..7fed0ff5d7c 100644 --- a/crates/dst/src/targets/datastore.rs +++ b/crates/dst/src/targets/datastore.rs @@ -27,7 +27,7 @@ use crate::{ seed::DstSeed, targets::{ harness::{self, TableTargetHarness}, - properties::{self, TargetPropertyAccess, TargetPropertyState}, + properties::{PropertyRuntime, TargetPropertyAccess}, }, workload::table_ops::{ ConnectionWriteState, TableScenarioId, TableWorkloadEngine, TableWorkloadInteraction, TableWorkloadOutcome, @@ -61,7 +61,7 @@ struct DatastoreEngine { datastore: Locking, table_ids: Vec, execution: ConnectionWriteState, - properties: TargetPropertyState, + properties: PropertyRuntime, step: u64, } @@ -74,7 +74,7 @@ impl DatastoreEngine { datastore, table_ids, execution: ConnectionWriteState::new(num_connections), - properties: TargetPropertyState::default(), + properties: PropertyRuntime::default(), step: 0, }) } @@ -202,13 +202,13 @@ impl DatastoreEngine { .map_err(|err| format!("range scan failed: {err}")) } - fn with_property_state( + fn with_property_runtime( &mut self, - f: impl FnOnce(&TargetPropertyState, &Self) -> Result, + f: impl FnOnce(&mut PropertyRuntime, &Self) -> Result, ) -> Result { - let state = std::mem::take(&mut self.properties); - let result = f(&state, self); - self.properties = state; + let mut runtime = std::mem::take(&mut self.properties); + let result = f(&mut runtime, self); + self.properties = runtime; result } } @@ -274,7 +274,9 @@ impl TableWorkloadEngine for DatastoreEngine { .commit_mut_tx(tx) .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; self.execution.active_writer = None; - self.with_property_state(|state, access| properties::on_commit_or_rollback(state, access))?; + self.with_property_runtime(|runtime, access| { + runtime.on_commit_or_rollback(access) + })?; } Interaction::RollbackTx { conn } => { self.execution.ensure_writer_owner(*conn, "rollback")?; @@ -283,7 +285,9 @@ impl TableWorkloadEngine for DatastoreEngine { .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = self.datastore.rollback_mut_tx(tx); self.execution.active_writer = None; - self.with_property_state(|state, access| properties::on_commit_or_rollback(state, access))?; + self.with_property_runtime(|runtime, access| { + runtime.on_commit_or_rollback(access) + })?; } Interaction::Insert { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); @@ -295,8 +299,8 @@ impl TableWorkloadEngine for DatastoreEngine { Ok(()) })?; let step = self.step; - self.with_property_state(|state, access| { - properties::on_insert(state, access, step, *conn, *table, row, in_tx) + self.with_property_runtime(|runtime, access| { + runtime.on_insert(access, step, *conn, *table, row, in_tx) })?; } Interaction::Delete { conn, table, row } => { @@ -309,8 +313,8 @@ impl TableWorkloadEngine for DatastoreEngine { Ok(()) })?; let step = self.step; - self.with_property_state(|state, access| { - properties::on_delete(state, access, step, *conn, *table, row, in_tx) + self.with_property_runtime(|runtime, access| { + runtime.on_delete(access, step, *conn, *table, row, in_tx) })?; } } diff --git a/crates/dst/src/targets/properties.rs b/crates/dst/src/targets/properties.rs index 5bf16bda63b..66b41b4354f 100644 --- a/crates/dst/src/targets/properties.rs +++ b/crates/dst/src/targets/properties.rs @@ -1,7 +1,7 @@ //! Target-level property runtime shared by datastore-oriented targets. //! -//! Properties are owned by targets (not workload generation). This keeps workloads as pure -//! operation streams and lets each target decide when and how to validate invariants. +//! Properties are defined once here and plugged into any target that +//! implements [`TargetPropertyAccess`]. use std::ops::Bound; @@ -9,17 +9,6 @@ use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use crate::schema::{SchemaPlan, SimRow}; -/// Property types supported by target execution. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(crate) enum TargetProperty { - InsertSelect, - DeleteSelect, - SelectSelectOptimizer, - WhereTrueFalseNull, - IndexRangeExcluded, - BankingTablesMatch, -} - /// Target adapter for property evaluation. pub(crate) trait TargetPropertyAccess { fn schema_plan(&self) -> &SchemaPlan; @@ -36,264 +25,383 @@ pub(crate) trait TargetPropertyAccess { ) -> Result, String>; } -/// Mutable runtime state for target-owned properties. -/// -/// This is intentionally small today, but it is the anchor for adding stateful -/// properties later (history windows, cross-step state, learned predicates, etc). -#[derive(Debug, Clone)] -pub(crate) struct TargetPropertyState { - periodic_every: u64, - enabled: Vec, +/// Canonical property IDs that can be selected by targets. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum PropertyKind { + InsertSelect, + DeleteSelect, + SelectSelectOptimizer, + WhereTrueFalseNull, + IndexRangeExcluded, + BankingTablesMatch, } -impl Default for TargetPropertyState { - fn default() -> Self { - Self { - periodic_every: 8, - enabled: vec![ - TargetProperty::InsertSelect, - TargetProperty::DeleteSelect, - TargetProperty::SelectSelectOptimizer, - TargetProperty::WhereTrueFalseNull, - TargetProperty::IndexRangeExcluded, - TargetProperty::BankingTablesMatch, - ], +/// Mutable runtime holding selected property implementations. +pub(crate) struct PropertyRuntime { + rules: Vec, +} + +impl PropertyRuntime { + pub fn with_kinds(kinds: &[PropertyKind]) -> Self { + let mut rules: Vec = Vec::with_capacity(kinds.len()); + for kind in kinds { + match kind { + PropertyKind::InsertSelect => rules.push(RuleEntry::new(*kind, Box::::default())), + PropertyKind::DeleteSelect => rules.push(RuleEntry::new(*kind, Box::::default())), + PropertyKind::SelectSelectOptimizer => rules.push(RuleEntry::new(*kind, Box::::default())), + PropertyKind::WhereTrueFalseNull => rules.push(RuleEntry::new(*kind, Box::::default())), + PropertyKind::IndexRangeExcluded => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } + PropertyKind::BankingTablesMatch => rules.push(RuleEntry::new(*kind, Box::::default())), + } } + Self { rules } } -} -impl TargetPropertyState { - fn enabled(&self, property: TargetProperty) -> bool { - self.enabled.contains(&property) + pub fn on_insert( + &mut self, + access: &dyn TargetPropertyAccess, + step: u64, + conn: usize, + table: usize, + row: &SimRow, + in_tx: bool, + ) -> Result<(), String> { + for entry in &mut self.rules { + entry.rule.on_insert(access, step, conn, table, row, in_tx)?; + } + if !in_tx { + for entry in &mut self.rules { + if let Some(every) = entry.periodic_every() && step.is_multiple_of(every) { + entry.rule.on_periodic(access, table)?; + } + } + } + Ok(()) } -} -pub(crate) fn on_insert( - state: &TargetPropertyState, - access: &A, - step: u64, - conn: usize, - table: usize, - row: &SimRow, - in_tx: bool, -) -> Result<(), String> { - if state.enabled(TargetProperty::InsertSelect) { - check_insert_select(access, conn, table, row)?; + pub fn on_delete( + &mut self, + access: &dyn TargetPropertyAccess, + step: u64, + conn: usize, + table: usize, + row: &SimRow, + in_tx: bool, + ) -> Result<(), String> { + for entry in &mut self.rules { + entry.rule.on_delete(access, step, conn, table, row, in_tx)?; + } + if !in_tx { + for entry in &mut self.rules { + if let Some(every) = entry.periodic_every() && step.is_multiple_of(every) { + entry.rule.on_periodic(access, table)?; + } + } + } + Ok(()) } - if !in_tx { - maybe_run_periodic(state, access, step, table)?; - if state.enabled(TargetProperty::BankingTablesMatch) { - check_banking_tables_match(access)?; + + pub fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { + for entry in &mut self.rules { + entry.rule.on_commit_or_rollback(access)?; } + Ok(()) } - Ok(()) } -pub(crate) fn on_delete( - state: &TargetPropertyState, - access: &A, - step: u64, - conn: usize, - table: usize, - row: &SimRow, - in_tx: bool, -) -> Result<(), String> { - if state.enabled(TargetProperty::DeleteSelect) { - check_delete_select(access, conn, table, row)?; +struct RuleEntry { + kind: PropertyKind, + rule: Box, +} + +impl RuleEntry { + fn new(kind: PropertyKind, rule: Box) -> Self { + Self { kind, rule } } - if !in_tx { - maybe_run_periodic(state, access, step, table)?; - if state.enabled(TargetProperty::BankingTablesMatch) { - check_banking_tables_match(access)?; + + fn periodic_every(&self) -> Option { + match self.kind { + PropertyKind::SelectSelectOptimizer | PropertyKind::WhereTrueFalseNull => Some(16), + PropertyKind::IndexRangeExcluded => Some(64), + _ => None, } } - Ok(()) } -pub(crate) fn on_commit_or_rollback( - state: &TargetPropertyState, - access: &A, -) -> Result<(), String> { - if state.enabled(TargetProperty::BankingTablesMatch) { - check_banking_tables_match(access)?; +impl Default for PropertyRuntime { + fn default() -> Self { + Self::with_kinds(&[ + PropertyKind::InsertSelect, + PropertyKind::DeleteSelect, + PropertyKind::SelectSelectOptimizer, + PropertyKind::WhereTrueFalseNull, + PropertyKind::IndexRangeExcluded, + PropertyKind::BankingTablesMatch, + ]) } - Ok(()) } -fn maybe_run_periodic( - state: &TargetPropertyState, - access: &A, - step: u64, - table: usize, -) -> Result<(), String> { - if state.periodic_every == 0 || !step.is_multiple_of(state.periodic_every) { - return Ok(()); +trait PropertyRule { + fn on_insert( + &mut self, + _access: &dyn TargetPropertyAccess, + _step: u64, + _conn: usize, + _table: usize, + _row: &SimRow, + _in_tx: bool, + ) -> Result<(), String> { + Ok(()) } - if state.enabled(TargetProperty::SelectSelectOptimizer) { - check_norec_select_select_optimizer(access, table)?; + + fn on_delete( + &mut self, + _access: &dyn TargetPropertyAccess, + _step: u64, + _conn: usize, + _table: usize, + _row: &SimRow, + _in_tx: bool, + ) -> Result<(), String> { + Ok(()) } - if state.enabled(TargetProperty::WhereTrueFalseNull) { - check_tlp_partitions(access, table)?; + + fn on_periodic(&mut self, _access: &dyn TargetPropertyAccess, _table: usize) -> Result<(), String> { + Ok(()) } - if state.enabled(TargetProperty::IndexRangeExcluded) { - check_index_range_excluded(access, table)?; + + fn on_commit_or_rollback(&mut self, _access: &dyn TargetPropertyAccess) -> Result<(), String> { + Ok(()) } - Ok(()) } -fn check_insert_select( - access: &A, - conn: usize, - table: usize, - row: &SimRow, -) -> Result<(), String> { - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = access.lookup_in_connection(conn, table, id)?; - if found != Some(row.clone()) { - return Err(format!( - "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" - )); +#[derive(Default)] +struct InsertSelectRule; + +impl PropertyRule for InsertSelectRule { + fn on_insert( + &mut self, + access: &dyn TargetPropertyAccess, + _step: u64, + conn: usize, + table: usize, + row: &SimRow, + _in_tx: bool, + ) -> Result<(), String> { + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = access.lookup_in_connection(conn, table, id)?; + if found != Some(row.clone()) { + return Err(format!( + "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" + )); + } + Ok(()) } - Ok(()) } -fn check_delete_select( - access: &A, - conn: usize, - table: usize, - row: &SimRow, -) -> Result<(), String> { - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - if access.lookup_in_connection(conn, table, id)?.is_some() { - return Err(format!( - "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" - )); +#[derive(Default)] +struct DeleteSelectRule; + +impl PropertyRule for DeleteSelectRule { + fn on_delete( + &mut self, + access: &dyn TargetPropertyAccess, + _step: u64, + conn: usize, + table: usize, + row: &SimRow, + _in_tx: bool, + ) -> Result<(), String> { + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if access.lookup_in_connection(conn, table, id)?.is_some() { + return Err(format!( + "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" + )); + } + Ok(()) } - Ok(()) } -fn check_norec_select_select_optimizer(access: &A, table: usize) -> Result<(), String> { - let table_plan = access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let Some((col_idx, col_ty)) = table_plan - .columns - .iter() - .enumerate() - .skip(1) - .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) - .map(|(idx, col)| (idx as u16, &col.ty)) - else { - return Ok(()); - }; +#[derive(Default)] +struct NoRecRule; - let scanned_rows = access.collect_rows_for_table(table)?; - if scanned_rows.is_empty() { - return Ok(()); - } +impl PropertyRule for NoRecRule { + fn on_periodic(&mut self, access: &dyn TargetPropertyAccess, table: usize) -> Result<(), String> { + let table_plan = access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some((col_idx, col_ty)) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) + .map(|(idx, col)| (idx as u16, &col.ty)) + else { + return Ok(()); + }; - let predicate_value = match col_ty { - AlgebraicType::Bool => AlgebraicValue::Bool(true), - AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), - _ => return Ok(()), - }; - let where_count = access.count_by_col_eq(table, col_idx, &predicate_value)?; - let projected_true_count = scanned_rows - .iter() - .filter(|row| row.values[col_idx as usize] == predicate_value) - .count(); - if where_count != projected_true_count { - return Err(format!( - "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" - )); + let scanned_rows = access.collect_rows_for_table(table)?; + if scanned_rows.is_empty() { + return Ok(()); + } + + let predicate_value = match col_ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), + _ => return Ok(()), + }; + let where_count = access.count_by_col_eq(table, col_idx, &predicate_value)?; + let projected_true_count = scanned_rows + .iter() + .filter(|row| row.values[col_idx as usize] == predicate_value) + .count(); + if where_count != projected_true_count { + return Err(format!( + "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" + )); + } + Ok(()) } - Ok(()) } -fn check_tlp_partitions(access: &A, table: usize) -> Result<(), String> { - let table_plan = access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let Some(col_idx) = table_plan - .columns - .iter() - .enumerate() - .skip(1) - .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) - .map(|(idx, _)| idx as u16) - else { - return Ok(()); - }; - let total = access.count_rows(table)?; - let true_count = access.count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; - let false_count = access.count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; - let partition_sum = true_count + false_count; - if partition_sum != total { - return Err(format!( - "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" - )); +#[derive(Default)] +struct TlpRule; + +impl PropertyRule for TlpRule { + fn on_periodic(&mut self, access: &dyn TargetPropertyAccess, table: usize) -> Result<(), String> { + let table_plan = access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some(col_idx) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) + .map(|(idx, _)| idx as u16) + else { + return Ok(()); + }; + let total = access.count_rows(table)?; + let true_count = access.count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; + let false_count = access.count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; + let partition_sum = true_count + false_count; + if partition_sum != total { + return Err(format!( + "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" + )); + } + Ok(()) } - Ok(()) } -fn check_index_range_excluded(access: &A, table: usize) -> Result<(), String> { - let table_plan = access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let rows = access.collect_rows_for_table(table)?; - if rows.len() < 2 { - return Ok(()); +#[derive(Default)] +struct IndexRangeExcludedRule; + +impl PropertyRule for IndexRangeExcludedRule { + fn on_periodic(&mut self, access: &dyn TargetPropertyAccess, table: usize) -> Result<(), String> { + const MAX_ROWS_FOR_INDEX_SCAN_CHECK: usize = 512; + + let table_plan = access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let rows = access.collect_rows_for_table(table)?; + if rows.len() < 2 || rows.len() > MAX_ROWS_FOR_INDEX_SCAN_CHECK { + return Ok(()); + } + + for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { + if !cols.iter().all(|&col| { + matches!( + table_plan.columns[col as usize].ty, + AlgebraicType::U64 | AlgebraicType::Bool + ) + }) { + continue; + } + + let mut sorted_rows = rows.clone(); + sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); + let upper_key = sorted_rows[sorted_rows.len() - 1] + .project_key(cols) + .to_algebraic_value(); + let lower = Bound::Included(lower_key.clone()); + let upper = Bound::Excluded(upper_key.clone()); + + let mut expected_rows = sorted_rows + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + key >= lower_key && key < upper_key + }) + .collect::>(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let mut actual_rows = access.range_scan(table, cols, lower, upper)?; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + if actual_rows != expected_rows { + return Err(format!( + "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" + )); + } + } + + Ok(()) } +} - for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { - if !cols.iter().all(|&col| { - matches!( - table_plan.columns[col as usize].ty, - AlgebraicType::U64 | AlgebraicType::Bool - ) - }) { - continue; +#[derive(Default)] +struct BankingMatchRule; + +impl PropertyRule for BankingMatchRule { + fn on_insert( + &mut self, + access: &dyn TargetPropertyAccess, + _step: u64, + _conn: usize, + _table: usize, + _row: &SimRow, + in_tx: bool, + ) -> Result<(), String> { + if in_tx { + return Ok(()); } + check_banking_tables_match(access) + } - let mut sorted_rows = rows.clone(); - sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); - let upper_key = sorted_rows[sorted_rows.len() - 1] - .project_key(cols) - .to_algebraic_value(); - let lower = Bound::Included(lower_key.clone()); - let upper = Bound::Excluded(upper_key.clone()); - - let mut expected_rows = sorted_rows - .into_iter() - .filter(|row| { - let key = row.project_key(cols).to_algebraic_value(); - key >= lower_key && key < upper_key - }) - .collect::>(); - expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - let mut actual_rows = access.range_scan(table, cols, lower, upper)?; - actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - if actual_rows != expected_rows { - return Err(format!( - "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" - )); + fn on_delete( + &mut self, + access: &dyn TargetPropertyAccess, + _step: u64, + _conn: usize, + _table: usize, + _row: &SimRow, + in_tx: bool, + ) -> Result<(), String> { + if in_tx { + return Ok(()); } + check_banking_tables_match(access) } - Ok(()) + fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { + check_banking_tables_match(access) + } } -fn check_banking_tables_match(access: &A) -> Result<(), String> { +fn check_banking_tables_match(access: &dyn TargetPropertyAccess) -> Result<(), String> { let schema = access.schema_plan(); let debit = schema.tables.iter().position(|table| table.name == "debit_accounts"); let credit = schema.tables.iter().position(|table| table.name == "credit_accounts"); diff --git a/crates/dst/src/targets/relational_db.rs b/crates/dst/src/targets/relational_db.rs index 864196e3df8..d948e659806 100644 --- a/crates/dst/src/targets/relational_db.rs +++ b/crates/dst/src/targets/relational_db.rs @@ -30,7 +30,7 @@ use crate::{ seed::DstSeed, targets::{ harness::{self, TableTargetHarness}, - properties::{self, TargetPropertyAccess, TargetPropertyState}, + properties::{PropertyRuntime, TargetPropertyAccess}, }, workload::table_ops::{ ConnectionWriteState, TableScenarioId, TableWorkloadEngine, TableWorkloadInteraction, TableWorkloadOutcome, @@ -64,7 +64,7 @@ struct RelationalDbEngine { db: RelationalDB, table_ids: Vec, execution: ConnectionWriteState, - properties: TargetPropertyState, + properties: PropertyRuntime, step: u64, } @@ -77,7 +77,7 @@ impl RelationalDbEngine { db, table_ids, execution: ConnectionWriteState::new(num_connections), - properties: TargetPropertyState::default(), + properties: PropertyRuntime::default(), step: 0, }) } @@ -218,13 +218,13 @@ impl RelationalDbEngine { .map_err(|err| format!("range scan failed: {err}")) } - fn with_property_state( + fn with_property_runtime( &mut self, - f: impl FnOnce(&TargetPropertyState, &Self) -> Result, + f: impl FnOnce(&mut PropertyRuntime, &Self) -> Result, ) -> Result { - let state = std::mem::take(&mut self.properties); - let result = f(&state, self); - self.properties = state; + let mut runtime = std::mem::take(&mut self.properties); + let result = f(&mut runtime, self); + self.properties = runtime; result } } @@ -288,7 +288,9 @@ impl TableWorkloadEngine for RelationalDbEngine { .commit_tx(tx) .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; self.execution.active_writer = None; - self.with_property_state(|state, access| properties::on_commit_or_rollback(state, access))?; + self.with_property_runtime(|runtime, access| { + runtime.on_commit_or_rollback(access) + })?; } RelationalDbInteraction::RollbackTx { conn } => { self.execution.ensure_writer_owner(*conn, "rollback")?; @@ -297,7 +299,9 @@ impl TableWorkloadEngine for RelationalDbEngine { .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = self.db.rollback_mut_tx(tx); self.execution.active_writer = None; - self.with_property_state(|state, access| properties::on_commit_or_rollback(state, access))?; + self.with_property_runtime(|runtime, access| { + runtime.on_commit_or_rollback(access) + })?; } RelationalDbInteraction::Insert { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); @@ -308,8 +312,8 @@ impl TableWorkloadEngine for RelationalDbEngine { Ok(()) })?; let step = self.step; - self.with_property_state(|state, access| { - properties::on_insert(state, access, step, *conn, *table, row, in_tx) + self.with_property_runtime(|runtime, access| { + runtime.on_insert(access, step, *conn, *table, row, in_tx) })?; } RelationalDbInteraction::Delete { conn, table, row } => { @@ -322,8 +326,8 @@ impl TableWorkloadEngine for RelationalDbEngine { Ok(()) })?; let step = self.step; - self.with_property_state(|state, access| { - properties::on_delete(state, access, step, *conn, *table, row, in_tx) + self.with_property_runtime(|runtime, access| { + runtime.on_delete(access, step, *conn, *table, row, in_tx) })?; } } diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 54cc26d2ecb..a349272ece8 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -2,6 +2,7 @@ use std::{ collections::{BTreeMap, HashMap}, + ops::Bound, time::Instant, }; @@ -34,6 +35,7 @@ use crate::{ core::NextInteractionSource, schema::{SchemaPlan, SimRow}, seed::{DstRng, DstSeed}, + targets::properties::{PropertyRuntime, TargetPropertyAccess}, workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, table_ops::{ConnectionWriteState, TableScenario, TableScenarioId, TableWorkloadInteraction}, @@ -104,6 +106,7 @@ struct RelationalDbCommitlogEngine { commitlog: MockCommitlogFs, last_durable_snapshot: DurableSnapshot, pending_snapshot_capture: bool, + properties: PropertyRuntime, } type DurableSnapshot = BTreeMap>; @@ -121,6 +124,7 @@ impl RelationalDbCommitlogEngine { commitlog: MockCommitlogFs::new(seed.fork(700)), last_durable_snapshot: BTreeMap::new(), pending_snapshot_capture: false, + properties: PropertyRuntime::default(), }; this.initialize_program().map_err(anyhow::Error::msg)?; this.install_base_schema().map_err(anyhow::Error::msg)?; @@ -222,6 +226,9 @@ impl RelationalDbCommitlogEngine { self.commit_tx_capture(tx, "commit interaction")?; self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; + self.with_property_runtime(|runtime, access| { + runtime.on_commit_or_rollback(access) + })?; Ok(()) } TableWorkloadInteraction::RollbackTx { conn } => { @@ -232,6 +239,9 @@ impl RelationalDbCommitlogEngine { let _ = self.db.rollback_mut_tx(tx); self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; + self.with_property_runtime(|runtime, access| { + runtime.on_commit_or_rollback(access) + })?; Ok(()) } TableWorkloadInteraction::Insert { conn, table, row } => { @@ -251,7 +261,10 @@ impl RelationalDbCommitlogEngine { if !in_tx { self.sync_and_snapshot(false)?; } - self.check_insert_select(*conn, *table, row) + let step = self.step as u64; + self.with_property_runtime(|runtime, access| { + runtime.on_insert(access, step, *conn, *table, row, in_tx) + }) } TableWorkloadInteraction::Delete { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); @@ -269,7 +282,10 @@ impl RelationalDbCommitlogEngine { if !in_tx { self.sync_and_snapshot(false)?; } - self.check_delete_select(*conn, *table, row) + let step = self.step as u64; + self.with_property_runtime(|runtime, access| { + runtime.on_delete(access, step, *conn, *table, row, in_tx) + }) } } } @@ -479,25 +495,58 @@ impl RelationalDbCommitlogEngine { } } - fn check_insert_select(&self, conn: usize, table: usize, row: &SimRow) -> Result<(), String> { - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = self.lookup_base_row(conn, table, id)?; - if found != Some(row.clone()) { - return Err(format!( - "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" - )); - } - Ok(()) + fn count_rows_for_property(&self, table: usize) -> Result { + let table_id = self.table_id_for_index(table)?; + let tx = self.db.begin_tx(Workload::ForTests); + let total = self + .db + .iter(&tx, table_id) + .map_err(|err| format!("scan failed: {err}"))? + .count(); + let _ = self.db.release_tx(tx); + Ok(total) } - fn check_delete_select(&self, conn: usize, table: usize, row: &SimRow) -> Result<(), String> { - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - if self.lookup_base_row(conn, table, id)?.is_some() { - return Err(format!( - "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" - )); - } - Ok(()) + fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + let table_id = self.table_id_for_index(table)?; + let tx = self.db.begin_tx(Workload::ForTests); + let total = self + .db + .iter_by_col_eq(&tx, table_id, col, value) + .map_err(|err| format!("predicate query failed: {err}"))? + .count(); + let _ = self.db.release_tx(tx); + Ok(total) + } + + fn range_scan_for_property( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + let tx = self.db.begin_tx(Workload::ForTests); + let cols = cols.iter().copied().collect::(); + let rows = self + .db + .iter_by_col_range(&tx, table_id, cols, (lower, upper)) + .map_err(|err| format!("range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + let _ = self.db.release_tx(tx); + Ok(rows) + } + + fn with_property_runtime( + &mut self, + f: impl FnOnce(&mut PropertyRuntime, &Self) -> Result, + ) -> Result { + let mut runtime = std::mem::take(&mut self.properties); + let result = f(&mut runtime, self); + self.properties = runtime; + result } fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { @@ -558,6 +607,39 @@ impl RelationalDbCommitlogEngine { } } +impl TargetPropertyAccess for RelationalDbCommitlogEngine { + fn schema_plan(&self) -> &SchemaPlan { + &self.base_schema + } + + fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { + Self::lookup_base_row(self, conn, table, id) + } + + fn collect_rows_for_table(&self, table: usize) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + Self::collect_rows_by_id(self, table_id) + } + + fn count_rows(&self, table: usize) -> Result { + Self::count_rows_for_property(self, table) + } + + fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + Self::count_by_col_eq_for_property(self, table, col, value) + } + + fn range_scan( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + Self::range_scan_for_property(self, table, cols, lower, upper) + } +} + fn reopen_from_history(history: MockHistory) -> Result { debug!("reopen relational db from mocked durable history"); let (db, connected_clients) = RelationalDB::open( From 3d96cf20ee33417400450809aea7e699d87fad2a Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 24 Apr 2026 15:49:32 +0530 Subject: [PATCH 13/74] open close durability --- crates/dst/Cargo.toml | 2 + crates/dst/README.md | 14 +- crates/dst/src/main.rs | 6 +- crates/dst/src/schema.rs | 67 ++- crates/dst/src/targets/descriptor.rs | 19 - crates/dst/src/targets/mod.rs | 1 - crates/dst/src/targets/relational_db.rs | 438 ------------------ .../src/targets/relational_db_commitlog.rs | 392 +++++++--------- .../src/workload/commitlog_ops/generation.rs | 3 + .../dst/src/workload/commitlog_ops/types.rs | 2 + 10 files changed, 248 insertions(+), 696 deletions(-) delete mode 100644 crates/dst/src/targets/relational_db.rs diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index d3672c78f4b..ae7693a30a5 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -19,12 +19,14 @@ anyhow.workspace = true clap.workspace = true serde.workspace = true serde_json.workspace = true +tokio = { version = "0.2.30", package = "madsim-tokio", features = ["full"] } spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.1.0" } spacetimedb-commitlog.workspace = true spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.1.0" } spacetimedb-execution.workspace = true spacetimedb-lib.workspace = true +spacetimedb-paths.workspace = true spacetimedb-primitives.workspace = true spacetimedb-sats.workspace = true spacetimedb-schema = { workspace = true, features = ["test"] } diff --git a/crates/dst/README.md b/crates/dst/README.md index 768bf7fea21..9051a86436e 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -42,7 +42,7 @@ shared workload generators, and concrete DST targets. shared table-style workload split into scenarios, generation, model, and properties - `targets/`: - `datastore.rs`, `relational_db.rs` + `datastore.rs`, `relational_db_commitlog.rs` - binary: `src/main.rs` @@ -55,7 +55,7 @@ If you are new to the crate, this order keeps the mental model small: 3. `seed.rs` 4. `workload/table_ops/` 5. `targets/datastore.rs` -6. `targets/relational_db.rs` +6. `targets/relational_db_commitlog.rs` ## Core Model @@ -88,7 +88,7 @@ The main reusable DST workload now lives in `workload/table_ops/`: 5. `runner.rs` generic execute/run helpers shared by multiple targets -Concrete targets like `targets/datastore.rs` and `targets/relational_db.rs` +Concrete targets like `targets/datastore.rs` and `targets/relational_db_commitlog.rs` reuse that workload and swap in target-specific engines and target-owned properties. @@ -127,7 +127,7 @@ Core commands: ```bash cargo run -p spacetimedb-dst -- run --target datastore --scenario banking --duration 5m cargo run -p spacetimedb-dst -- run --target datastore --scenario indexed-ranges --duration 5m -cargo run -p spacetimedb-dst -- run --target relational-db --seed 42 --max-interactions 2000 +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --seed 42 --max-interactions 2000 cargo run -p spacetimedb-dst -- replay --target datastore bug.json cargo run -p spacetimedb-dst -- shrink --target datastore bug.json ``` @@ -163,7 +163,7 @@ In that case: - `shrink_failure` 5. add the target to the CLI `TargetKind` -This is the path `datastore` and `relational_db` use today. +This is the path `datastore` and `relational_db_commitlog` use today. ### 2. Add A New Workload Family @@ -270,6 +270,6 @@ single-engine shape used by current table targets. ## Current Scope -This crate provides shared table workload generation, two concrete targets -(`datastore` and `relational_db`), and a small CLI for seeded or +This crate provides shared table workload generation, concrete targets +(`datastore` and `relational_db_commitlog`), and a small CLI for seeded or duration-bounded runs. diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index b23b49c89ac..ba5e65e7cf8 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -4,9 +4,7 @@ use clap::{Args, Parser, Subcommand, ValueEnum}; use spacetimedb_dst::{ config::RunConfig, seed::DstSeed, - targets::descriptor::{ - DatastoreDescriptor, RelationalDbCommitlogDescriptor, RelationalDbDescriptor, TargetDescriptor, - }, + targets::descriptor::{DatastoreDescriptor, RelationalDbCommitlogDescriptor, TargetDescriptor}, workload::table_ops::TableScenarioId, }; @@ -46,7 +44,6 @@ struct RunArgs { #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] enum TargetKind { Datastore, - RelationalDb, RelationalDbCommitlog, } @@ -94,7 +91,6 @@ fn run_command(args: RunArgs) -> anyhow::Result<()> { match args.target.target { TargetKind::Datastore => run_target::(seed, scenario, config), - TargetKind::RelationalDb => run_target::(seed, scenario, config), TargetKind::RelationalDbCommitlog => run_target::(seed, scenario, config), } } diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs index ab12f834bf5..1a3dd693bd3 100644 --- a/crates/dst/src/schema.rs +++ b/crates/dst/src/schema.rs @@ -44,24 +44,57 @@ pub struct SimRow { #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] enum SerdeAlgebraicValue { + Bool(bool), + I8(i8), + U8(u8), + I16(i16), + U16(u16), + I32(i32), + U32(u32), + I64(i64), U64(u64), + I128(i128), + U128(u128), String(String), - Bool(bool), } pub fn generate_supported_type(rng: &mut DstRng) -> AlgebraicType { - match rng.index(3) { - 0 => AlgebraicType::U64, - 1 => AlgebraicType::String, - _ => AlgebraicType::Bool, + match rng.index(12) { + 0 => AlgebraicType::Bool, + 1 => AlgebraicType::I8, + 2 => AlgebraicType::U8, + 3 => AlgebraicType::I16, + 4 => AlgebraicType::U16, + 5 => AlgebraicType::I32, + 6 => AlgebraicType::U32, + 7 => AlgebraicType::I64, + 8 => AlgebraicType::U64, + 9 => AlgebraicType::I128, + 10 => AlgebraicType::U128, + _ => AlgebraicType::String, } } pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), + AlgebraicType::I8 => AlgebraicValue::I8(((rng.next_u64() % 64) as i8) - 32), + AlgebraicType::U8 => AlgebraicValue::U8((rng.next_u64() % u8::MAX as u64) as u8), + AlgebraicType::I16 => AlgebraicValue::I16(((rng.next_u64() % 2048) as i16) - 1024), + AlgebraicType::U16 => AlgebraicValue::U16((rng.next_u64() % u16::MAX as u64) as u16), + AlgebraicType::I32 => AlgebraicValue::I32(((rng.next_u64() % 200_000) as i32) - 100_000), + AlgebraicType::U32 => AlgebraicValue::U32((rng.next_u64() % 1_000_000) as u32), + AlgebraicType::I64 => AlgebraicValue::I64(((rng.next_u64() % 2_000_000) as i64) - 1_000_000), AlgebraicType::U64 => AlgebraicValue::U64((rng.next_u64() % 1000) + idx as u64), + AlgebraicType::I128 => { + let v = ((rng.next_u64() % 2_000_000) as i128) - 1_000_000; + AlgebraicValue::I128(v.into()) + } + AlgebraicType::U128 => { + let v = (rng.next_u64() % 2_000_000) as u128; + AlgebraicValue::U128(v.into()) + } AlgebraicType::String => AlgebraicValue::String(format!("v{}_{}", idx, rng.next_u64() % 10_000).into()), - AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), other => panic!("unsupported generated column type: {other:?}"), } } @@ -69,9 +102,18 @@ pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) impl From<&AlgebraicValue> for SerdeAlgebraicValue { fn from(value: &AlgebraicValue) -> Self { match value { + AlgebraicValue::Bool(value) => Self::Bool(*value), + AlgebraicValue::I8(value) => Self::I8(*value), + AlgebraicValue::U8(value) => Self::U8(*value), + AlgebraicValue::I16(value) => Self::I16(*value), + AlgebraicValue::U16(value) => Self::U16(*value), + AlgebraicValue::I32(value) => Self::I32(*value), + AlgebraicValue::U32(value) => Self::U32(*value), + AlgebraicValue::I64(value) => Self::I64(*value), AlgebraicValue::U64(value) => Self::U64(*value), + AlgebraicValue::I128(value) => Self::I128(value.0), + AlgebraicValue::U128(value) => Self::U128(value.0), AlgebraicValue::String(value) => Self::String(value.to_string()), - AlgebraicValue::Bool(value) => Self::Bool(*value), other => panic!("unsupported value in simulator row serde: {other:?}"), } } @@ -80,9 +122,18 @@ impl From<&AlgebraicValue> for SerdeAlgebraicValue { impl From for AlgebraicValue { fn from(value: SerdeAlgebraicValue) -> Self { match value { + SerdeAlgebraicValue::Bool(value) => Self::Bool(value), + SerdeAlgebraicValue::I8(value) => Self::I8(value), + SerdeAlgebraicValue::U8(value) => Self::U8(value), + SerdeAlgebraicValue::I16(value) => Self::I16(value), + SerdeAlgebraicValue::U16(value) => Self::U16(value), + SerdeAlgebraicValue::I32(value) => Self::I32(value), + SerdeAlgebraicValue::U32(value) => Self::U32(value), + SerdeAlgebraicValue::I64(value) => Self::I64(value), SerdeAlgebraicValue::U64(value) => Self::U64(value), + SerdeAlgebraicValue::I128(value) => Self::I128(value.into()), + SerdeAlgebraicValue::U128(value) => Self::U128(value.into()), SerdeAlgebraicValue::String(value) => Self::String(value.into()), - SerdeAlgebraicValue::Bool(value) => Self::Bool(value), } } } diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index cb0cd315cb5..3eab5e82b4b 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -29,25 +29,6 @@ impl TargetDescriptor for DatastoreDescriptor { } -pub struct RelationalDbDescriptor; - -impl TargetDescriptor for RelationalDbDescriptor { - const NAME: &'static str = "relational_db"; - type Scenario = TableScenarioId; - - fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result { - let outcome = crate::targets::relational_db::run_generated_with_config_and_scenario(seed, scenario, config)?; - Ok(format!( - "ok target={} seed={} tables={} row_counts={:?}", - Self::NAME, - seed.0, - outcome.final_rows.len(), - outcome.final_row_counts - )) - } - -} - pub struct RelationalDbCommitlogDescriptor; impl TargetDescriptor for RelationalDbCommitlogDescriptor { diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index 67376f8bbf9..ed5386b4fc9 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -4,5 +4,4 @@ pub mod datastore; pub mod descriptor; pub(crate) mod harness; pub(crate) mod properties; -pub mod relational_db; pub mod relational_db_commitlog; diff --git a/crates/dst/src/targets/relational_db.rs b/crates/dst/src/targets/relational_db.rs deleted file mode 100644 index d948e659806..00000000000 --- a/crates/dst/src/targets/relational_db.rs +++ /dev/null @@ -1,438 +0,0 @@ -//! Basic RelationalDB simulator target using the shared table workload. - -use std::ops::Bound; - -use spacetimedb_core::{ - db::relational_db::{MutTx as RelMutTx, RelationalDB}, - messages::control_db::HostType, -}; -use spacetimedb_datastore::{ - execution_context::Workload, - traits::{IsolationLevel, Program}, -}; -use spacetimedb_durability::EmptyHistory; -use spacetimedb_lib::{ - db::auth::{StAccess, StTableType}, - Identity, -}; -use spacetimedb_primitives::TableId; -use spacetimedb_sats::AlgebraicValue; -use spacetimedb_schema::{ - def::BTreeAlgorithm, - schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, - table_name::TableName, -}; -use spacetimedb_table::page_pool::PagePool; - -use crate::{ - config::RunConfig, - schema::{SchemaPlan, SimRow}, - seed::DstSeed, - targets::{ - harness::{self, TableTargetHarness}, - properties::{PropertyRuntime, TargetPropertyAccess}, - }, - workload::table_ops::{ - ConnectionWriteState, TableScenarioId, TableWorkloadEngine, TableWorkloadInteraction, TableWorkloadOutcome, - }, -}; - -pub type RelationalDbSimulatorOutcome = TableWorkloadOutcome; -type RelationalDbInteraction = TableWorkloadInteraction; - -struct RelationalDbTarget; - -impl TableTargetHarness for RelationalDbTarget { - type Engine = RelationalDbEngine; - - fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - RelationalDbEngine::new(schema, num_connections) - } -} - -pub fn run_generated_with_config_and_scenario( - seed: DstSeed, - scenario: TableScenarioId, - config: RunConfig, -) -> anyhow::Result { - harness::run_generated_with_config_and_scenario::(seed, scenario, config) -} - -/// Concrete `RelationalDB` execution harness for the shared table workload. -struct RelationalDbEngine { - schema: SchemaPlan, - db: RelationalDB, - table_ids: Vec, - execution: ConnectionWriteState, - properties: PropertyRuntime, - step: u64, -} - -impl RelationalDbEngine { - fn new(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - let db = bootstrap_relational_db()?; - let table_ids = install_schema(&db, schema)?; - Ok(Self { - schema: schema.clone(), - db, - table_ids, - execution: ConnectionWriteState::new(num_connections), - properties: PropertyRuntime::default(), - step: 0, - }) - } - - fn with_mut_tx( - &mut self, - conn: usize, - table: usize, - mut f: impl FnMut(&RelationalDB, TableId, &mut RelMutTx) -> Result<(), String>, - ) -> Result<(), String> { - let table_id = *self - .table_ids - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - self.execution.ensure_known_connection(conn)?; - let slot = &mut self.execution.tx_by_connection[conn]; - - match slot { - Some(tx) => f(&self.db, table_id, tx), - None => { - if let Some(owner) = self.execution.active_writer { - return Err(format!( - "connection {conn} cannot auto-commit write while connection {owner} owns lock" - )); - } - let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - self.execution.active_writer = Some(conn); - f(&self.db, table_id, &mut tx)?; - self.db - .commit_tx(tx) - .map_err(|err| format!("auto-commit failed on connection {conn}: {err}"))?; - self.execution.active_writer = None; - Ok(()) - } - } - } - - fn fresh_lookup(&self, table_id: TableId, id: u64) -> anyhow::Result> { - let tx = self.db.begin_tx(Workload::ForTests); - let result = self - .db - .iter_by_col_eq(&tx, table_id, 0u16, &AlgebraicValue::U64(id))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .find(|row| row.id() == Some(id)); - let _ = self.db.release_tx(tx); - Ok(result) - } - - fn collect_rows_for_table(&self, table: usize) -> anyhow::Result> { - let table_id = *self - .table_ids - .get(table) - .ok_or_else(|| anyhow::anyhow!("table {table} out of range"))?; - let tx = self.db.begin_tx(Workload::ForTests); - let mut rows = self - .db - .iter(&tx, table_id)? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - let _ = self.db.release_tx(tx); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - Ok(rows) - } - - fn fresh_range_scan( - &self, - table_id: TableId, - cols: &[u16], - lower: Bound, - upper: Bound, - ) -> anyhow::Result> { - let tx = self.db.begin_tx(Workload::ForTests); - let cols = cols.iter().copied().collect::(); - let rows = self - .db - .iter_by_col_range(&tx, table_id, cols, (lower, upper))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect(); - let _ = self.db.release_tx(tx); - Ok(rows) - } - - fn table_id(&self, table: usize) -> Result { - self.table_ids - .get(table) - .copied() - .ok_or_else(|| format!("table {table} out of range")) - } - - fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { - let table_id = self.table_id(table)?; - if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { - Ok(self - .db - .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("in-tx lookup failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .next()) - } else { - self.fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}")) - } - } - - fn count_rows_for_property(&self, table: usize) -> Result { - let table_id = self.table_id(table)?; - let tx = self.db.begin_tx(Workload::ForTests); - let total = self - .db - .iter(&tx, table_id) - .map_err(|err| format!("scan failed: {err}"))? - .count(); - let _ = self.db.release_tx(tx); - Ok(total) - } - - fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { - let table_id = self.table_id(table)?; - let tx = self.db.begin_tx(Workload::ForTests); - let total = self - .db - .iter_by_col_eq(&tx, table_id, col, value) - .map_err(|err| format!("predicate query failed: {err}"))? - .count(); - let _ = self.db.release_tx(tx); - Ok(total) - } - - fn range_scan_for_property( - &self, - table: usize, - cols: &[u16], - lower: Bound, - upper: Bound, - ) -> Result, String> { - let table_id = self.table_id(table)?; - self.fresh_range_scan(table_id, cols, lower, upper) - .map_err(|err| format!("range scan failed: {err}")) - } - - fn with_property_runtime( - &mut self, - f: impl FnOnce(&mut PropertyRuntime, &Self) -> Result, - ) -> Result { - let mut runtime = std::mem::take(&mut self.properties); - let result = f(&mut runtime, self); - self.properties = runtime; - result - } -} - -impl TargetPropertyAccess for RelationalDbEngine { - fn schema_plan(&self) -> &SchemaPlan { - &self.schema - } - - fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { - Self::lookup_in_connection(self, conn, table, id) - } - - fn collect_rows_for_table(&self, table: usize) -> Result, String> { - Self::collect_rows_for_table(self, table).map_err(|err| format!("collect rows failed: {err}")) - } - - fn count_rows(&self, table: usize) -> Result { - Self::count_rows_for_property(self, table) - } - - fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { - Self::count_by_col_eq_for_property(self, table, col, value) - } - - fn range_scan( - &self, - table: usize, - cols: &[u16], - lower: Bound, - upper: Bound, - ) -> Result, String> { - Self::range_scan_for_property(self, table, cols, lower, upper) - } -} - -impl TableWorkloadEngine for RelationalDbEngine { - fn execute(&mut self, interaction: &RelationalDbInteraction) -> Result<(), String> { - self.step = self.step.saturating_add(1); - match interaction { - RelationalDbInteraction::BeginTx { conn } => { - self.execution.ensure_known_connection(*conn)?; - if self.execution.tx_by_connection[*conn].is_some() { - return Err(format!("connection {conn} already has open transaction")); - } - if let Some(owner) = self.execution.active_writer { - return Err(format!( - "connection {conn} cannot begin write transaction while connection {owner} owns lock" - )); - } - self.execution.tx_by_connection[*conn] = - Some(self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests)); - self.execution.active_writer = Some(*conn); - } - RelationalDbInteraction::CommitTx { conn } => { - self.execution.ensure_writer_owner(*conn, "commit")?; - let tx = self.execution.tx_by_connection[*conn] - .take() - .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; - self.db - .commit_tx(tx) - .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; - self.execution.active_writer = None; - self.with_property_runtime(|runtime, access| { - runtime.on_commit_or_rollback(access) - })?; - } - RelationalDbInteraction::RollbackTx { conn } => { - self.execution.ensure_writer_owner(*conn, "rollback")?; - let tx = self.execution.tx_by_connection[*conn] - .take() - .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; - let _ = self.db.rollback_mut_tx(tx); - self.execution.active_writer = None; - self.with_property_runtime(|runtime, access| { - runtime.on_commit_or_rollback(access) - })?; - } - RelationalDbInteraction::Insert { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); - self.with_mut_tx(*conn, *table, |db, table_id, tx| { - let bsatn = row.to_bsatn().map_err(|err: anyhow::Error| err.to_string())?; - db.insert(tx, table_id, &bsatn) - .map_err(|err| format!("insert failed: {err}"))?; - Ok(()) - })?; - let step = self.step; - self.with_property_runtime(|runtime, access| { - runtime.on_insert(access, step, *conn, *table, row, in_tx) - })?; - } - RelationalDbInteraction::Delete { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); - self.with_mut_tx(*conn, *table, |db, table_id, tx| { - let deleted = db.delete_by_rel(tx, table_id, [row.to_product_value()]); - if deleted != 1 { - return Err(format!("delete expected 1 row, got {deleted}")); - } - Ok(()) - })?; - let step = self.step; - self.with_property_runtime(|runtime, access| { - runtime.on_delete(access, step, *conn, *table, row, in_tx) - })?; - } - } - - Ok(()) - } - - fn collect_outcome(&mut self) -> anyhow::Result { - let tx = self.db.begin_tx(Workload::ForTests); - let mut final_rows = Vec::with_capacity(self.table_ids.len()); - let mut final_row_counts = Vec::with_capacity(self.table_ids.len()); - - for &table_id in &self.table_ids { - let mut rows = self - .db - .iter(&tx, table_id)? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - final_row_counts.push(rows.len() as u64); - final_rows.push(rows); - } - let _ = self.db.release_tx(tx); - - Ok(RelationalDbSimulatorOutcome { - final_row_counts, - final_rows, - }) - } - - fn finish(&mut self) { - for tx in &mut self.execution.tx_by_connection { - if let Some(tx) = tx.take() { - let _ = self.db.rollback_mut_tx(tx); - } - } - self.execution.active_writer = None; - } -} - -fn bootstrap_relational_db() -> anyhow::Result { - let (db, connected_clients) = RelationalDB::open( - Identity::ZERO, - Identity::ZERO, - EmptyHistory::new(), - None, - None, - PagePool::new_for_test(), - )?; - assert_eq!(connected_clients.len(), 0); - db.with_auto_commit(Workload::Internal, |tx| { - db.set_initialized(tx, Program::empty(HostType::Wasm.into())) - })?; - Ok(db) -} - -fn install_schema(db: &RelationalDB, schema: &SchemaPlan) -> anyhow::Result> { - let mut tx = db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - let mut table_ids = Vec::with_capacity(schema.tables.len()); - - for table in &schema.tables { - let columns = table - .columns - .iter() - .enumerate() - .map(|(idx, col)| ColumnSchema::for_test(idx as u16, &col.name, col.ty.clone())) - .collect::>(); - - let mut indexes = vec![IndexSchema::for_test( - format!("{}_id_idx", table.name), - BTreeAlgorithm::from(0), - )]; - for cols in &table.extra_indexes { - let cols_name = cols.iter().map(|col| format!("c{col}")).collect::>().join("_"); - indexes.push(IndexSchema::for_test( - format!("{}_{}_idx", table.name, cols_name), - BTreeAlgorithm::from(cols.iter().copied().collect::()), - )); - } - let constraints = vec![ConstraintSchema::unique_for_test( - format!("{}_id_unique", table.name), - 0, - )]; - - let table_id = db.create_table( - &mut tx, - TableSchema::new( - TableId::SENTINEL, - TableName::for_test(&table.name), - None, - columns, - indexes, - constraints, - vec![], - StTableType::User, - StAccess::Public, - None, - Some(0.into()), - false, - None, - ), - )?; - table_ids.push(table_id); - } - - db.commit_tx(tx)?; - Ok(table_ids) -} diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index a349272ece8..452ee8837db 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -3,38 +3,39 @@ use std::{ collections::{BTreeMap, HashMap}, ops::Bound, - time::Instant, + sync::Arc, + time::{Instant, SystemTime, UNIX_EPOCH}, }; -use spacetimedb_commitlog::{self as commitlog, error::Traversal}; use spacetimedb_core::{ - db::relational_db::{MutTx as RelMutTx, RelationalDB, Txdata}, + db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB, Txdata}, messages::control_db::HostType, }; use spacetimedb_datastore::{ execution_context::Workload, - traits::{IsolationLevel, Program, TxData as DatastoreTxData}, + traits::{IsolationLevel, Program}, }; -use spacetimedb_durability::{EmptyHistory, History, TxOffset}; +use spacetimedb_durability::{Durability, EmptyHistory, History}; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, }; +use spacetimedb_paths::{server::ReplicaDir, FromPathUnchecked}; use spacetimedb_primitives::TableId; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; use spacetimedb_schema::{ def::BTreeAlgorithm, schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, table_name::TableName, }; use spacetimedb_table::page_pool::PagePool; -use tracing::{debug, info, trace, warn}; +use tracing::{debug, info, trace}; use crate::{ config::RunConfig, core::NextInteractionSource, schema::{SchemaPlan, SimRow}, - seed::{DstRng, DstSeed}, + seed::DstSeed, targets::properties::{PropertyRuntime, TargetPropertyAccess}, workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, @@ -103,17 +104,20 @@ struct RelationalDbCommitlogEngine { base_table_ids: Vec, dynamic_tables: HashMap, step: usize, - commitlog: MockCommitlogFs, + durability: Arc>, + last_observed_durable_offset: Option, last_durable_snapshot: DurableSnapshot, pending_snapshot_capture: bool, properties: PropertyRuntime, + runtime_handle: tokio::runtime::Handle, + _runtime_guard: Option, } type DurableSnapshot = BTreeMap>; impl RelationalDbCommitlogEngine { fn new(seed: DstSeed, schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - let db = bootstrap_relational_db()?; + let (db, durability, runtime_handle, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; let mut this = Self { db, execution: ConnectionWriteState::new(num_connections), @@ -121,24 +125,18 @@ impl RelationalDbCommitlogEngine { base_table_ids: Vec::with_capacity(schema.tables.len()), dynamic_tables: HashMap::new(), step: 0, - commitlog: MockCommitlogFs::new(seed.fork(700)), + durability, + last_observed_durable_offset: None, last_durable_snapshot: BTreeMap::new(), pending_snapshot_capture: false, properties: PropertyRuntime::default(), + runtime_handle, + _runtime_guard: runtime_guard, }; - this.initialize_program().map_err(anyhow::Error::msg)?; this.install_base_schema().map_err(anyhow::Error::msg)?; Ok(this) } - fn initialize_program(&mut self) -> Result<(), String> { - let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::Internal); - self.db - .set_initialized(&mut tx, Program::empty(HostType::Wasm.into())) - .map_err(|err| format!("set_initialized failed: {err}"))?; - self.commit_tx_capture(tx, "initialize") - } - fn install_base_schema(&mut self) -> Result<(), String> { let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); for table in &self.base_schema.tables { @@ -186,7 +184,10 @@ impl RelationalDbCommitlogEngine { .map_err(|err| format!("create table '{}' failed: {err}", table.name))?; self.base_table_ids.push(table_id); } - self.commit_tx_capture(tx, "install base schema") + self.db + .commit_tx(tx) + .map(|_| ()) + .map_err(|err| format!("install base schema commit failed: {err}")) } fn execute(&mut self, interaction: &CommitlogInteraction) -> Result<(), String> { @@ -197,7 +198,87 @@ impl RelationalDbCommitlogEngine { CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), CommitlogInteraction::ChaosSync => self.sync_and_snapshot(true), + CommitlogInteraction::CloseReopen => self.close_and_reopen(), + } + } + + fn close_and_reopen(&mut self) -> Result<(), String> { + if self.execution.active_writer.is_some() + || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) + { + trace!("skip close/reopen while transaction is open"); + return Ok(()); + } + + self.sync_and_snapshot(true)?; + let history = self.durability.as_history(); + let persistence = Persistence { + durability: self.durability.clone(), + disk_size: Arc::new({ + let durability = self.durability.clone(); + move || durability.size_on_disk() + }), + snapshots: None, + runtime: self.runtime_handle.clone(), + }; + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + history, + Some(persistence), + None, + PagePool::new_for_test(), + ) + .map_err(|err| format!("close/reopen failed: {err}"))?; + if !connected_clients.is_empty() { + return Err(format!( + "unexpected connected clients after reopen: {connected_clients:?}" + )); + } + self.db = db; + self.rebuild_table_handles_after_reopen()?; + self.capture_pending_snapshot_if_idle()?; + debug!( + base_tables = self.base_table_ids.len(), + dynamic_tables = self.dynamic_tables.len(), + "reopened relational db from durable history" + ); + Ok(()) + } + + fn rebuild_table_handles_after_reopen(&mut self) -> Result<(), String> { + let tx = self.db.begin_tx(Workload::ForTests); + let schemas = self + .db + .get_all_tables(&tx) + .map_err(|err| format!("list tables after reopen failed: {err}"))?; + let _ = self.db.release_tx(tx); + + let mut by_name = HashMap::with_capacity(schemas.len()); + for schema in schemas { + by_name.insert(schema.table_name.to_string(), schema.table_id); } + + self.base_table_ids.clear(); + for table in &self.base_schema.tables { + let table_id = by_name + .get(&table.name) + .copied() + .ok_or_else(|| format!("base table '{}' missing after reopen", table.name))?; + self.base_table_ids.push(table_id); + } + + self.dynamic_tables.retain(|slot, state| { + let name = dynamic_table_name(*slot, state.version); + if let Some(table_id) = by_name.get(&name).copied() { + state.table_id = table_id; + true + } else { + false + } + }); + + Ok(()) } fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String> { @@ -223,7 +304,9 @@ impl RelationalDbCommitlogEngine { let tx = self.execution.tx_by_connection[*conn] .take() .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; - self.commit_tx_capture(tx, "commit interaction")?; + self.db + .commit_tx(tx) + .map_err(|err| format!("commit interaction failed: {err}"))?; self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; self.with_property_runtime(|runtime, access| { @@ -314,7 +397,9 @@ impl RelationalDbCommitlogEngine { let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); self.execution.active_writer = Some(conn); f(self, &mut tx)?; - self.commit_tx_capture(tx, "auto-commit write")?; + self.db + .commit_tx(tx) + .map_err(|err| format!("auto-commit write failed: {err}"))?; self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; Ok(()) @@ -412,35 +497,25 @@ impl RelationalDbCommitlogEngine { self.execution.active_writer.unwrap_or(conn) } - fn commit_tx_capture(&mut self, tx: RelMutTx, context: &str) -> Result<(), String> { - let committed = self - .db - .commit_tx(tx) - .map_err(|err| format!("{context} commit failed: {err}"))?; - if let Some((offset, tx_data, _, _)) = committed { - let Some(encoded) = encode_txdata_for_commitlog(&tx_data) else { - trace!(step = self.step, context, "commit had no durable payload"); - return Ok(()); - }; - trace!(step = self.step, context, offset, "append tx to mock commitlog"); - self.commitlog - .append(offset, encoded) - .map_err(|err| format!("{context} append to mock commitlog failed: {err}"))?; - } - Ok(()) - } - fn sync_and_snapshot(&mut self, forced: bool) -> Result<(), String> { - let advanced = self - .commitlog - .sync(forced) - .map_err(|err| format!("mock sync failed: {err}"))?; + let current = self + .durability + .durable_tx_offset() + .get() + .map_err(|err| format!("read durable offset failed: {err}"))?; + let advanced = match (self.last_observed_durable_offset, current) { + (None, Some(_)) => true, + (Some(prev), Some(now)) => now > prev, + _ => false, + }; + self.last_observed_durable_offset = current; trace!( step = self.step, forced, advanced, - durable_count = self.commitlog.durable_count(), - "mock sync" + durable_offset = ?current, + queue_depth = self.durability.queue_depth(), + "durability observe" ); if advanced { if self.execution.active_writer.is_some() { @@ -583,16 +658,20 @@ impl RelationalDbCommitlogEngine { fn collect_outcome(&mut self) -> Result { self.capture_pending_snapshot_if_idle()?; self.sync_and_snapshot(true)?; - let history = MockHistory::from_durable(self.commitlog.durable_records())?; + let history = self.durability.as_history(); let replayed = reopen_from_history(history)?; + let durable_commit_count = self + .last_observed_durable_offset + .map(|offset| (offset as usize).saturating_add(1)) + .unwrap_or(0); debug!( - durable_commits = self.commitlog.durable_count(), + durable_commits = durable_commit_count, replay_tables = replayed.len(), "replayed durable prefix" ); Ok(RelationalDbCommitlogOutcome { applied_steps: self.step, - durable_commit_count: self.commitlog.durable_count(), + durable_commit_count, replay_table_count: replayed.len(), }) } @@ -640,7 +719,7 @@ impl TargetPropertyAccess for RelationalDbCommitlogEngine { } } -fn reopen_from_history(history: MockHistory) -> Result { +fn reopen_from_history(history: impl History) -> Result { debug!("reopen relational db from mocked durable history"); let (db, connected_clients) = RelationalDB::open( Identity::ZERO, @@ -684,17 +763,58 @@ fn is_user_dst_table(name: &str) -> bool { !name.starts_with("st_") } -fn bootstrap_relational_db() -> anyhow::Result { +fn bootstrap_relational_db( + seed: DstSeed, +) -> anyhow::Result<( + RelationalDB, + Arc>, + tokio::runtime::Handle, + Option, +)> { + let (runtime_handle, runtime_guard) = if let Ok(handle) = tokio::runtime::Handle::try_current() { + (handle, None) + } else { + let runtime = tokio::runtime::Runtime::new()?; + (runtime.handle().clone(), Some(runtime)) + }; + let replica_dir = dst_replica_dir(seed)?; + let durability = Arc::new( + spacetimedb_durability::Local::open(replica_dir, runtime_handle.clone(), Default::default(), None) + .map_err(|err| anyhow::anyhow!("open local durability failed: {err}"))?, + ); + let persistence = Persistence { + durability: durability.clone(), + disk_size: Arc::new({ + let durability = durability.clone(); + move || durability.size_on_disk() + }), + snapshots: None, + runtime: runtime_handle.clone(), + }; let (db, connected_clients) = RelationalDB::open( Identity::ZERO, Identity::ZERO, EmptyHistory::new(), - None, + Some(persistence), None, PagePool::new_for_test(), )?; assert_eq!(connected_clients.len(), 0); - Ok(db) + db.with_auto_commit(Workload::Internal, |tx| { + db.set_initialized(tx, Program::empty(HostType::Wasm.into())) + })?; + Ok((db, durability, runtime_handle, runtime_guard)) +} + +fn dst_replica_dir(seed: DstSeed) -> anyhow::Result { + let nonce = SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos(); + let path = std::env::temp_dir().join(format!( + "spacetimedb-dst-relational-db-commitlog-{}-{}-{nonce}", + seed.0, + std::process::id() + )); + std::fs::create_dir_all(&path)?; + Ok(ReplicaDir::from_path_unchecked(path)) } fn dynamic_table_name(slot: u32, version: u32) -> String { @@ -727,167 +847,3 @@ fn dynamic_schema(name: &str, version: u32) -> TableSchema { None, ) } - -fn encode_txdata_for_commitlog(tx_data: &DatastoreTxData) -> Option { - let _tx_offset = tx_data.tx_offset()?; - let mut inserts: Box<_> = tx_data - .persistent_inserts() - .map(|(table_id, rowdata)| commitlog::payload::txdata::Ops { table_id, rowdata }) - .collect(); - inserts.sort_unstable_by_key(|ops| ops.table_id); - - let mut deletes: Box<_> = tx_data - .persistent_deletes() - .map(|(table_id, rowdata)| commitlog::payload::txdata::Ops { table_id, rowdata }) - .collect(); - deletes.sort_unstable_by_key(|ops| ops.table_id); - - let mut truncates: Box<[_]> = tx_data.persistent_truncates().collect(); - truncates.sort_unstable_by_key(|table_id| *table_id); - - Some(Txdata { - inputs: None, - outputs: None, - mutations: Some(commitlog::payload::txdata::Mutations { - inserts, - deletes, - truncates, - }), - }) -} - -/// Deterministic mocked file/commitlog layer with chaos. -struct MockCommitlogFs { - chaos_rng: DstRng, - pending: Vec<(u64, Txdata)>, - durable: Vec<(u64, Txdata)>, - commits_since_sync: usize, -} - -impl MockCommitlogFs { - fn new(seed: DstSeed) -> Self { - Self { - chaos_rng: seed.rng(), - pending: Vec::new(), - durable: Vec::new(), - commits_since_sync: 0, - } - } - - fn append(&mut self, tx_offset: u64, txdata: Txdata) -> Result<(), String> { - // deterministic append chaos: low-rate injected write failure - if self.chaos_rng.index(1000) < 6 { - warn!(tx_offset, "mock commitlog injected append error"); - return Err("injected append error".to_string()); - } - if let Some((last_offset, _)) = self.pending.last().or_else(|| self.durable.last()) - && tx_offset != last_offset.saturating_add(1) - { - return Err(format!( - "non-contiguous commitlog append: got={tx_offset} expected={}", - last_offset.saturating_add(1) - )); - } - self.pending.push((tx_offset, txdata)); - self.commits_since_sync = self.commits_since_sync.saturating_add(1); - trace!( - tx_offset, - pending = self.pending.len(), - durable = self.durable.len(), - commits_since_sync = self.commits_since_sync, - "mock commitlog append" - ); - Ok(()) - } - - fn sync(&mut self, forced: bool) -> Result { - if self.pending.is_empty() { - return Ok(false); - } - - // periodic delayed fsync behavior - let should_attempt = forced || self.commits_since_sync >= 3 || self.chaos_rng.index(100) < 30; - if !should_attempt { - trace!( - forced, - pending = self.pending.len(), - commits_since_sync = self.commits_since_sync, - "mock sync skipped (delay)" - ); - return Ok(false); - } - - // injected fsync miss: pretend sync happened but keep data pending - if !forced && self.chaos_rng.index(100) < 12 { - self.commits_since_sync = 0; - warn!( - pending = self.pending.len(), - "mock sync injected miss (no durable advance)" - ); - return Ok(false); - } - - let mut advanced = false; - for pending in self.pending.drain(..) { - self.durable.push(pending); - advanced = true; - } - self.commits_since_sync = 0; - debug!(durable = self.durable.len(), "mock sync advanced durable prefix"); - Ok(advanced) - } - - fn durable_records(&self) -> &[(u64, Txdata)] { - &self.durable - } - - fn durable_count(&self) -> usize { - self.durable.len() - } -} - -/// In-memory history used to replay exactly the durable commitlog prefix. -struct MockHistory(commitlog::commitlog::Generic); - -impl MockHistory { - fn from_durable(records: &[(u64, Txdata)]) -> Result { - let mut log = commitlog::commitlog::Generic::open(commitlog::repo::Memory::unlimited(), Default::default()) - .map_err(|err| format!("open in-memory commitlog failed: {err}"))?; - for (offset, txdata) in records { - log.commit([(*offset, txdata.clone())]) - .map_err(|err| format!("append durable tx offset={offset} failed: {err}"))?; - } - Ok(Self(log)) - } -} - -impl History for MockHistory { - type TxData = Txdata; - - fn fold_transactions_from(&self, offset: TxOffset, decoder: D) -> Result<(), D::Error> - where - D: commitlog::Decoder, - D::Error: From, - { - self.0.fold_transactions_from(offset, decoder) - } - - fn transactions_from<'a, D>( - &self, - offset: TxOffset, - decoder: &'a D, - ) -> impl Iterator, D::Error>> - where - D: commitlog::Decoder, - D::Error: From, - Self::TxData: 'a, - { - self.0.transactions_from(offset, decoder) - } - - fn tx_range_hint(&self) -> (TxOffset, Option) { - let min = self.0.min_committed_offset().unwrap_or_default(); - let max = self.0.max_committed_offset(); - (min, max) - } -} diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index 61ca39fe1c3..ea79ab8d87d 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -53,6 +53,9 @@ impl NextInteractionGeneratorComposite { if self.rng.index(100) < 18 { self.pending.push_back(CommitlogInteraction::ChaosSync); } + if self.rng.index(100) < 4 { + self.pending.push_back(CommitlogInteraction::CloseReopen); + } if self.rng.index(100) < 9 { let conn = self.rng.index(self.num_connections); diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index b473d554049..cb50bedef38 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -17,6 +17,8 @@ pub enum CommitlogInteraction { MigrateDynamicTable { conn: usize, slot: u32 }, /// Ask the mock commitlog file layer to run a sync attempt. ChaosSync, + /// Close and restart the database from durable history. + CloseReopen, } /// Successful run summary for commitlog target. From 3cac9ef446029d949550aa57e15f904d0b800607 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 24 Apr 2026 19:52:48 +0530 Subject: [PATCH 14/74] typo --- .../src/targets/relational_db_commitlog.rs | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 452ee8837db..a28d59af5c7 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -61,7 +61,7 @@ pub fn run_generated_with_config_and_scenario( num_connections, config.max_interactions_or_default(usize::MAX), ); - let mut engine = RelationalDbCommitlogEngine::new(seed, &schema, num_connections)?; + let mut engine = RelationalDbEngine::new(seed, &schema, num_connections)?; let deadline = config.deadline(); let mut step_index = 0usize; @@ -97,7 +97,7 @@ struct DynamicTableState { } /// Engine executing mixed table+lifecycle interactions while recording mocked durable history. -struct RelationalDbCommitlogEngine { +struct RelationalDbEngine { db: RelationalDB, execution: ConnectionWriteState, base_schema: SchemaPlan, @@ -110,14 +110,15 @@ struct RelationalDbCommitlogEngine { pending_snapshot_capture: bool, properties: PropertyRuntime, runtime_handle: tokio::runtime::Handle, + replica_dir: ReplicaDir, _runtime_guard: Option, } type DurableSnapshot = BTreeMap>; -impl RelationalDbCommitlogEngine { +impl RelationalDbEngine { fn new(seed: DstSeed, schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - let (db, durability, runtime_handle, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; + let (db, durability, runtime_handle, replica_dir, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; let mut this = Self { db, execution: ConnectionWriteState::new(num_connections), @@ -131,6 +132,7 @@ impl RelationalDbCommitlogEngine { pending_snapshot_capture: false, properties: PropertyRuntime::default(), runtime_handle, + replica_dir, _runtime_guard: runtime_guard, }; this.install_base_schema().map_err(anyhow::Error::msg)?; @@ -211,11 +213,24 @@ impl RelationalDbCommitlogEngine { } self.sync_and_snapshot(true)?; - let history = self.durability.as_history(); + // In madsim we avoid blocking close here; dropping the close future + // triggers actor abort via durability's close guard. + drop(self.durability.close()); + + let durability = Arc::new( + spacetimedb_durability::Local::open( + self.replica_dir.clone(), + self.runtime_handle.clone(), + Default::default(), + None, + ) + .map_err(|err| format!("reopen local durability failed: {err}"))?, + ); + let persistence = Persistence { - durability: self.durability.clone(), + durability: durability.clone(), disk_size: Arc::new({ - let durability = self.durability.clone(); + let durability = durability.clone(); move || durability.size_on_disk() }), snapshots: None, @@ -224,7 +239,7 @@ impl RelationalDbCommitlogEngine { let (db, connected_clients) = RelationalDB::open( Identity::ZERO, Identity::ZERO, - history, + durability.as_history(), Some(persistence), None, PagePool::new_for_test(), @@ -235,6 +250,7 @@ impl RelationalDbCommitlogEngine { "unexpected connected clients after reopen: {connected_clients:?}" )); } + self.durability = durability; self.db = db; self.rebuild_table_handles_after_reopen()?; self.capture_pending_snapshot_if_idle()?; @@ -686,7 +702,7 @@ impl RelationalDbCommitlogEngine { } } -impl TargetPropertyAccess for RelationalDbCommitlogEngine { +impl TargetPropertyAccess for RelationalDbEngine { fn schema_plan(&self) -> &SchemaPlan { &self.base_schema } @@ -769,6 +785,7 @@ fn bootstrap_relational_db( RelationalDB, Arc>, tokio::runtime::Handle, + ReplicaDir, Option, )> { let (runtime_handle, runtime_guard) = if let Ok(handle) = tokio::runtime::Handle::try_current() { @@ -779,7 +796,7 @@ fn bootstrap_relational_db( }; let replica_dir = dst_replica_dir(seed)?; let durability = Arc::new( - spacetimedb_durability::Local::open(replica_dir, runtime_handle.clone(), Default::default(), None) + spacetimedb_durability::Local::open(replica_dir.clone(), runtime_handle.clone(), Default::default(), None) .map_err(|err| anyhow::anyhow!("open local durability failed: {err}"))?, ); let persistence = Persistence { @@ -803,7 +820,7 @@ fn bootstrap_relational_db( db.with_auto_commit(Workload::Internal, |tx| { db.set_initialized(tx, Program::empty(HostType::Wasm.into())) })?; - Ok((db, durability, runtime_handle, runtime_guard)) + Ok((db, durability, runtime_handle, replica_dir, runtime_guard)) } fn dst_replica_dir(seed: DstSeed) -> anyhow::Result { From 0983c62853048fa4162c164f63410da4b2a41737 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 24 Apr 2026 21:28:09 +0530 Subject: [PATCH 15/74] better properties --- Cargo.lock | 201 ++++++++++++++++-- crates/core/Cargo.toml | 3 +- crates/core/src/host/scheduler.rs | 6 +- crates/dst/Cargo.toml | 5 +- crates/dst/src/main.rs | 3 +- .../src/workload/commitlog_ops/generation.rs | 32 ++- crates/dst/src/workload/mod.rs | 1 + crates/dst/src/workload/strategy.rs | 150 +++++++++++++ .../dst/src/workload/table_ops/generation.rs | 66 ++++-- crates/dst/src/workload/table_ops/mod.rs | 1 + .../dst/src/workload/table_ops/strategies.rs | 65 ++++++ crates/durability/Cargo.toml | 4 +- 12 files changed, 480 insertions(+), 57 deletions(-) create mode 100644 crates/dst/src/workload/strategy.rs create mode 100644 crates/dst/src/workload/table_ops/strategies.rs diff --git a/Cargo.lock b/Cargo.lock index cbf00b21508..6ab4f39ad48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,7 +34,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", "once_cell", "version_check", ] @@ -231,6 +231,18 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "async-channel" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" +dependencies = [ + "concurrent-queue", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + [[package]] name = "async-scoped" version = "0.9.0" @@ -264,6 +276,12 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + [[package]] name = "async-trait" version = "0.1.89" @@ -1086,6 +1104,15 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "concurrent_lru" version = "0.2.0" @@ -1585,14 +1612,38 @@ dependencies = [ "synstructure 0.12.6", ] +[[package]] +name = "darling" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +dependencies = [ + "darling_core 0.14.4", + "darling_macro 0.14.4", +] + [[package]] name = "darling" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.21.3", + "darling_macro 0.21.3", +] + +[[package]] +name = "darling_core" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.10.0", + "syn 1.0.109", ] [[package]] @@ -1609,13 +1660,24 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "darling_macro" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +dependencies = [ + "darling_core 0.14.4", + "quote", + "syn 1.0.109", +] + [[package]] name = "darling_macro" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ - "darling_core", + "darling_core 0.21.3", "quote", "syn 2.0.107", ] @@ -1877,6 +1939,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + [[package]] name = "dragonbox_ecma" version = "0.1.0" @@ -2027,7 +2095,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f43e744e4ea338060faee68ed933e46e722fb7f3617e722a5772d7e856d8b3ce" dependencies = [ - "darling", + "darling 0.21.3", "proc-macro2", "quote", "syn 2.0.107", @@ -2106,6 +2174,27 @@ dependencies = [ "serde", ] +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + [[package]] name = "event-table-client" version = "2.1.0" @@ -2509,9 +2598,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "a06fddc2749e0528d2813f95e050e87e52c8cbbae56223b9babf73b3e53b0cc6" dependencies = [ "cfg-if", "js-sys", @@ -3934,6 +4023,55 @@ dependencies = [ "libc", ] +[[package]] +name = "madsim" +version = "0.2.34" +dependencies = [ + "ahash 0.8.12", + "async-channel", + "async-stream", + "async-task", + "bincode", + "bytes", + "downcast-rs", + "errno", + "futures-util", + "lazy_static", + "libc", + "madsim-macros", + "naive-timer", + "panic-message", + "rand 0.8.5", + "rand_xoshiro", + "rustversion", + "serde", + "spin", + "tokio", + "tokio-util", + "toml 0.9.8", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "madsim-macros" +version = "0.2.12" +dependencies = [ + "darling 0.14.4", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "madsim-tokio" +version = "0.2.30" +dependencies = [ + "madsim", + "spin", + "tokio", +] + [[package]] name = "mappings" version = "0.7.1" @@ -4109,6 +4247,12 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "naive-timer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "034a0ad7deebf0c2abcf2435950a6666c3c15ea9d8fad0c0f48efa8a7f843fed" + [[package]] name = "names" version = "0.14.0" @@ -5132,6 +5276,12 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "panic-message" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384e52fd8fbd4cbe3c317e8216260c21a0f9134de108cea8a4dd4e7e152c472d" + [[package]] name = "papaya" version = "0.2.3" @@ -5153,6 +5303,12 @@ dependencies = [ "unicode-width 0.1.14", ] +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.11.2" @@ -6065,7 +6221,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", ] [[package]] @@ -6086,6 +6242,15 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "rayon" version = "1.11.0" @@ -6139,7 +6304,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", "libredox", "thiserror 1.0.69", ] @@ -6368,7 +6533,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.13", "libc", "untrusted", "windows-sys 0.52.0", @@ -7308,7 +7473,7 @@ version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7e6c180db0816026a61afa1cff5344fb7ebded7e4d3062772179f2501481c27" dependencies = [ - "darling", + "darling 0.21.3", "proc-macro2", "quote", "syn 2.0.107", @@ -7593,7 +7758,7 @@ checksum = "db18cb19c7499ba4a65b1504442179a7e4aba487dc35978d90966c5ca02ee16b" dependencies = [ "bytemuck", "derive_more 0.99.20", - "getrandom 0.2.16", + "getrandom 0.2.13", "log", "rand 0.8.5", "scoped-tls", @@ -7612,7 +7777,7 @@ dependencies = [ "bytemuck", "bytes", "derive_more 0.99.20", - "getrandom 0.2.16", + "getrandom 0.2.13", "http 1.3.1", "insta", "log", @@ -7987,6 +8152,7 @@ dependencies = [ "itertools 0.12.1", "lazy_static", "log", + "madsim-tokio", "memchr", "nix 0.30.1", "nohash-hasher", @@ -8049,7 +8215,6 @@ dependencies = [ "thiserror 1.0.69", "tikv-jemalloc-ctl", "tikv-jemallocator", - "tokio", "tokio-metrics", "tokio-stream", "tokio-util", @@ -8126,17 +8291,23 @@ version = "2.1.0" dependencies = [ "anyhow", "clap 4.5.50", + "madsim", + "madsim-tokio", "serde", "serde_json", + "spacetimedb-commitlog", "spacetimedb-core", "spacetimedb-datastore", "spacetimedb-durability", "spacetimedb-execution", "spacetimedb-lib 2.1.0", + "spacetimedb-paths", "spacetimedb-primitives 2.1.0", "spacetimedb-sats 2.1.0", "spacetimedb-schema", "spacetimedb-table", + "tracing", + "tracing-subscriber", ] [[package]] @@ -8147,6 +8318,7 @@ dependencies = [ "futures", "itertools 0.12.1", "log", + "madsim-tokio", "scopeguard", "spacetimedb-commitlog", "spacetimedb-fs-utils", @@ -8154,7 +8326,6 @@ dependencies = [ "spacetimedb-sats 2.1.0", "tempfile", "thiserror 1.0.69", - "tokio", "tracing", ] diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 704d9af6c4c..1578ee4bf59 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -105,7 +105,7 @@ tempfile.workspace = true thiserror.workspace = true thin-vec.workspace = true tokio-util.workspace = true -tokio.workspace = true +tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } tokio-stream = { workspace = true, features = ["sync"] } tokio-metrics = { version = "0.4.0", features = ["rt"] } toml.workspace = true @@ -134,6 +134,7 @@ tikv-jemalloc-ctl = {workspace = true} [target.'cfg(target_os = "linux")'.dependencies] nix = { workspace = true, features = ["sched"] } + [features] # Print a warning when doing an unindexed `iter_by_col_range` on a large table. unindexed_iter_by_col_range_warn = [] diff --git a/crates/core/src/host/scheduler.rs b/crates/core/src/host/scheduler.rs index d3b285e9f16..36084fcce6e 100644 --- a/crates/core/src/host/scheduler.rs +++ b/crates/core/src/host/scheduler.rs @@ -127,7 +127,7 @@ impl SchedulerStarter { id_column, at_column, }; - let key = queue.insert_at(QueueItem::Id { id, at }, now_instant + duration); + let key = queue.insert_at(QueueItem::Id { id, at }, (now_instant + duration).into()); // This should never happen as duplicate entries should be gated by unique // constraint violation in scheduled tables. @@ -314,7 +314,7 @@ impl SchedulerActor { if let Some(key) = self.key_map.get(&id) { self.queue.remove(key); } - let key = self.queue.insert_at(QueueItem::Id { id, at: effective_at }, real_at); + let key = self.queue.insert_at(QueueItem::Id { id, at: effective_at }, real_at.into()); self.key_map.insert(id, key); } SchedulerMessage::ScheduleImmediate { function_name, args } => { @@ -354,7 +354,7 @@ impl SchedulerActor { }) => { if let Some(id) = id { // If this was repeated, we need to add it back to the queue. - let key = self.queue.insert_at(QueueItem::Id { id, at: at_ts }, at_real); + let key = self.queue.insert_at(QueueItem::Id { id, at: at_ts }, at_real.into()); self.key_map.insert(id, key); } } diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index ae7693a30a5..e6f3d76a99e 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -19,7 +19,7 @@ anyhow.workspace = true clap.workspace = true serde.workspace = true serde_json.workspace = true -tokio = { version = "0.2.30", package = "madsim-tokio", features = ["full"] } +tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.1.0" } spacetimedb-commitlog.workspace = true @@ -33,3 +33,6 @@ spacetimedb-schema = { workspace = true, features = ["test"] } spacetimedb-table.workspace = true tracing.workspace = true tracing-subscriber.workspace = true + +[target.'cfg(madsim)'.dependencies] +madsim = { path = "../../../../madsim/madsim" } diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index ba5e65e7cf8..e0075e1db68 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -64,7 +64,8 @@ impl From for TableScenarioId { } } -fn main() -> anyhow::Result<()> { +#[tokio::main] +async fn main() -> anyhow::Result<()> { init_tracing(); match Cli::parse().command { Command::Run(args) => run_command(args), diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index ea79ab8d87d..bb878ae1f47 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -6,9 +6,10 @@ use crate::{ core::NextInteractionSource, schema::SchemaPlan, seed::{DstRng, DstSeed}, + workload::strategy::{Index, Percent, Strategy}, workload::{ commitlog_ops::CommitlogInteraction, - table_ops::{NextInteractionGenerator, TableScenario}, + table_ops::{strategies::ConnectionChoice, NextInteractionGenerator, TableScenario}, }, }; @@ -50,15 +51,18 @@ impl NextInteractionGeneratorComposite { }; self.pending.push_back(CommitlogInteraction::Table(base_op)); - if self.rng.index(100) < 18 { + if Percent::new(18).sample(&mut self.rng) { self.pending.push_back(CommitlogInteraction::ChaosSync); } - if self.rng.index(100) < 4 { + if Percent::new(4).sample(&mut self.rng) { self.pending.push_back(CommitlogInteraction::CloseReopen); } - if self.rng.index(100) < 9 { - let conn = self.rng.index(self.num_connections); + if Percent::new(9).sample(&mut self.rng) { + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng); let slot = self.next_slot; self.next_slot = self.next_slot.saturating_add(1); self.alive_slots.insert(slot); @@ -67,9 +71,12 @@ impl NextInteractionGeneratorComposite { return true; } - if !self.alive_slots.is_empty() && self.rng.index(100) < 6 { - let conn = self.rng.index(self.num_connections); - let idx = self.rng.index(self.alive_slots.len()); + if !self.alive_slots.is_empty() && Percent::new(6).sample(&mut self.rng) { + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng); + let idx = Index::new(self.alive_slots.len()).sample(&mut self.rng); let slot = *self .alive_slots .iter() @@ -79,9 +86,12 @@ impl NextInteractionGeneratorComposite { .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); } - if !self.alive_slots.is_empty() && self.rng.index(100) < 5 { - let conn = self.rng.index(self.num_connections); - let idx = self.rng.index(self.alive_slots.len()); + if !self.alive_slots.is_empty() && Percent::new(5).sample(&mut self.rng) { + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng); + let idx = Index::new(self.alive_slots.len()).sample(&mut self.rng); let slot = *self .alive_slots .iter() diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs index e9c13ce836c..52482e737f1 100644 --- a/crates/dst/src/workload/mod.rs +++ b/crates/dst/src/workload/mod.rs @@ -1,4 +1,5 @@ //! Shared workload generators reused by multiple DST targets. pub mod commitlog_ops; +pub(crate) mod strategy; pub mod table_ops; diff --git a/crates/dst/src/workload/strategy.rs b/crates/dst/src/workload/strategy.rs new file mode 100644 index 00000000000..5e469aa2e9e --- /dev/null +++ b/crates/dst/src/workload/strategy.rs @@ -0,0 +1,150 @@ +//! Small proptest-inspired strategy primitives for deterministic DST generation. +//! +//! This is intentionally minimal: we keep DST's streaming execution model and +//! use strategies only for typed, composable input generation. + +use std::marker::PhantomData; + +use crate::seed::DstRng; + +/// Typed strategy that can sample values from the shared deterministic RNG. +pub(crate) trait Strategy: Sized { + fn sample(&self, rng: &mut DstRng) -> T; + + #[allow(dead_code)] + fn map(self, f: F) -> Map + where + F: Fn(T) -> U, + { + Map { + inner: self, + f, + _marker: PhantomData, + } + } +} + +/// `map` combinator for strategies. +#[allow(dead_code)] +pub(crate) struct Map { + inner: S, + f: F, + _marker: PhantomData T>, +} + +impl Strategy for Map +where + S: Strategy, + F: Fn(T) -> U, +{ + fn sample(&self, rng: &mut DstRng) -> U { + (self.f)(self.inner.sample(rng)) + } +} + +/// Picks a value in `[0, upper)`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Index { + upper: usize, +} + +impl Index { + pub(crate) fn new(upper: usize) -> Self { + assert!(upper > 0, "index upper bound must be non-zero"); + Self { upper } + } +} + +impl Strategy for Index { + fn sample(&self, rng: &mut DstRng) -> usize { + rng.index(self.upper) + } +} + +/// Bernoulli-style strategy from an integer percentage in `[0, 100]`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Percent { + percent: usize, +} + +impl Percent { + pub(crate) fn new(percent: usize) -> Self { + Self { + percent: percent.min(100), + } + } +} + +impl Strategy for Percent { + fn sample(&self, rng: &mut DstRng) -> bool { + Index::new(100).sample(rng) < self.percent + } +} + +/// Weighted discrete choice over cloneable values. +#[derive(Clone, Debug)] +pub(crate) struct Weighted { + options: Vec<(usize, T)>, + total_weight: usize, +} + +impl Weighted { + pub(crate) fn new(options: Vec<(usize, T)>) -> Self { + let total_weight = options.iter().map(|(weight, _)| *weight).sum(); + assert!(total_weight > 0, "weighted strategy requires positive total weight"); + Self { + options, + total_weight, + } + } +} + +impl Strategy for Weighted { + fn sample(&self, rng: &mut DstRng) -> T { + let mut pick = Index::new(self.total_weight).sample(rng); + for (weight, value) in &self.options { + if pick < *weight { + return value.clone(); + } + pick -= *weight; + } + self.options + .last() + .map(|(_, value)| value.clone()) + .expect("weighted strategy has at least one option") + } +} + +#[cfg(test)] +mod tests { + use crate::seed::DstSeed; + + use super::{Index, Percent, Strategy, Weighted}; + + #[test] + fn weighted_is_deterministic_for_seed() { + let strategy = Weighted::new(vec![(1, 10usize), (2, 20usize), (3, 30usize)]); + let mut rng_a = DstSeed(7).rng(); + let mut rng_b = DstSeed(7).rng(); + let a = (0..16).map(|_| strategy.sample(&mut rng_a)).collect::>(); + let b = (0..16).map(|_| strategy.sample(&mut rng_b)).collect::>(); + assert_eq!(a, b); + } + + #[test] + fn map_combinator_works() { + let strategy = Percent::new(30).map(|picked| if picked { 1 } else { 0 }); + let mut rng = DstSeed(99).rng(); + let values = (0..8).map(|_| strategy.sample(&mut rng)).collect::>(); + assert!(values.iter().all(|v| *v == 0 || *v == 1)); + } + + #[test] + fn index_strategy_respects_bounds() { + let mut rng = DstSeed(123).rng(); + for _ in 0..64 { + let idx = Index::new(5).sample(&mut rng); + assert!(idx < 5); + } + } +} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index 8e6fb110bbb..eb7d298aaee 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -4,9 +4,14 @@ use crate::{ core::NextInteractionSource, schema::SchemaPlan, seed::{DstRng, DstSeed}, + workload::strategy::{Index, Percent, Strategy}, }; -use super::{model::GenerationModel, TableScenario, TableWorkloadInteraction}; +use super::{ + model::GenerationModel, + strategies::{ConnectionChoice, TableChoice, TxControlAction, TxControlChoice}, + TableScenario, TableWorkloadInteraction, +}; /// Streaming planner for table-oriented workloads. /// @@ -46,15 +51,18 @@ pub struct ScenarioPlanner<'a> { impl<'a> ScenarioPlanner<'a> { pub fn choose_index(&mut self, len: usize) -> usize { - self.rng.index(len) + Index::new(len).sample(self.rng) } pub fn choose_table(&mut self) -> usize { - self.rng.index(self.model.schema.tables.len()) + TableChoice { + table_count: self.model.schema.tables.len(), + } + .sample(self.rng) } pub fn roll_percent(&mut self, percent: usize) -> bool { - self.rng.index(100) < percent + Percent::new(percent).sample(self.rng) } /// Tries to emit one transaction control interaction for `conn`. @@ -62,25 +70,32 @@ impl<'a> ScenarioPlanner<'a> { /// The shared generator owns transaction lifecycle so scenario code can /// focus on domain operations like inserts, deletes, and range checks. pub fn maybe_control_tx(&mut self, conn: usize, begin_pct: usize, commit_pct: usize, rollback_pct: usize) -> bool { - if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() && self.roll_percent(begin_pct) { - self.model.begin_tx(conn); - self.pending.push_back(TableWorkloadInteraction::BeginTx { conn }); - return true; - } - - if self.model.connections[conn].in_tx && self.roll_percent(commit_pct) { - self.model.commit(conn); - self.pending.push_back(TableWorkloadInteraction::CommitTx { conn }); - return true; - } - - if self.model.connections[conn].in_tx && self.roll_percent(rollback_pct) { - self.model.rollback(conn); - self.pending.push_back(TableWorkloadInteraction::RollbackTx { conn }); - return true; + match (TxControlChoice { + begin_pct, + commit_pct, + rollback_pct, + }) + .sample(self.rng) + { + TxControlAction::Begin + if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() => + { + self.model.begin_tx(conn); + self.pending.push_back(TableWorkloadInteraction::BeginTx { conn }); + true + } + TxControlAction::Commit if self.model.connections[conn].in_tx => { + self.model.commit(conn); + self.pending.push_back(TableWorkloadInteraction::CommitTx { conn }); + true + } + TxControlAction::Rollback if self.model.connections[conn].in_tx => { + self.model.rollback(conn); + self.pending.push_back(TableWorkloadInteraction::RollbackTx { conn }); + true + } + _ => false, } - - false } pub fn visible_rows(&self, conn: usize, table: usize) -> Vec { @@ -152,7 +167,12 @@ impl NextInteractionGenerator { let conn = self .model .active_writer() - .unwrap_or_else(|| self.rng.index(self.num_connections)); + .unwrap_or_else(|| { + ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng) + }); let mut planner = ScenarioPlanner { rng: &mut self.rng, model: &mut self.model, diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index a3942348df1..7af6500db7a 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -4,6 +4,7 @@ mod generation; mod model; mod runner; mod scenarios; +pub(crate) mod strategies; mod types; pub(crate) use generation::NextInteractionGenerator; diff --git a/crates/dst/src/workload/table_ops/strategies.rs b/crates/dst/src/workload/table_ops/strategies.rs new file mode 100644 index 00000000000..76faacf241e --- /dev/null +++ b/crates/dst/src/workload/table_ops/strategies.rs @@ -0,0 +1,65 @@ +//! Typed strategies specific to table-style workload generation. + +use crate::{ + seed::DstRng, + workload::strategy::{Index, Strategy, Weighted}, +}; + +/// Choose one connection uniformly. +#[derive(Clone, Copy, Debug)] +pub(crate) struct ConnectionChoice { + pub(crate) connection_count: usize, +} + +impl Strategy for ConnectionChoice { + fn sample(&self, rng: &mut DstRng) -> usize { + Index::new(self.connection_count).sample(rng) + } +} + +/// Choose one table uniformly. +#[derive(Clone, Copy, Debug)] +pub(crate) struct TableChoice { + pub(crate) table_count: usize, +} + +impl Strategy for TableChoice { + fn sample(&self, rng: &mut DstRng) -> usize { + Index::new(self.table_count).sample(rng) + } +} + +/// Weighted transaction control action. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum TxControlAction { + Begin, + Commit, + Rollback, + None, +} + +/// Strategy for begin/commit/rollback control flow. +#[derive(Clone, Copy, Debug)] +pub(crate) struct TxControlChoice { + pub(crate) begin_pct: usize, + pub(crate) commit_pct: usize, + pub(crate) rollback_pct: usize, +} + +impl Strategy for TxControlChoice { + fn sample(&self, rng: &mut DstRng) -> TxControlAction { + let begin = self.begin_pct.min(100); + let commit = self.commit_pct.min(100); + let rollback = self.rollback_pct.min(100); + let reserved = begin.saturating_add(commit).saturating_add(rollback).min(100); + let none = 100usize.saturating_sub(reserved); + + Weighted::new(vec![ + (begin, TxControlAction::Begin), + (commit, TxControlAction::Commit), + (rollback, TxControlAction::Rollback), + (none, TxControlAction::None), + ]) + .sample(rng) + } +} diff --git a/crates/durability/Cargo.toml b/crates/durability/Cargo.toml index fbc2eaa9fae..9bdd543850b 100644 --- a/crates/durability/Cargo.toml +++ b/crates/durability/Cargo.toml @@ -22,13 +22,13 @@ spacetimedb-fs-utils.workspace = true spacetimedb-paths.workspace = true spacetimedb-sats.workspace = true thiserror.workspace = true -tokio.workspace = true +tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } tracing.workspace = true [dev-dependencies] spacetimedb-commitlog = { workspace = true, features = ["test"] } tempfile.workspace = true -tokio.workspace = true +tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } [lints] workspace = true From 17a40c6db3ff9dac4fe9885114295406763d23d0 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 28 Apr 2026 01:27:20 +0530 Subject: [PATCH 16/74] tmp --- crates/dst/src/targets/datastore.rs | 26 +- crates/dst/src/targets/descriptor.rs | 2 - crates/dst/src/targets/properties.rs | 12 +- .../src/targets/relational_db_commitlog.rs | 377 ++++++++++-------- .../src/workload/commitlog_ops/generation.rs | 6 + crates/dst/src/workload/strategy.rs | 5 +- .../dst/src/workload/table_ops/generation.rs | 19 +- 7 files changed, 252 insertions(+), 195 deletions(-) diff --git a/crates/dst/src/targets/datastore.rs b/crates/dst/src/targets/datastore.rs index 7fed0ff5d7c..930d876641d 100644 --- a/crates/dst/src/targets/datastore.rs +++ b/crates/dst/src/targets/datastore.rs @@ -79,12 +79,12 @@ impl DatastoreEngine { }) } - fn with_mut_tx( + fn with_mut_tx( &mut self, conn: usize, table: usize, - mut f: impl FnMut(&Locking, TableId, &mut MutTxId) -> Result<(), String>, - ) -> Result<(), String> { + mut f: impl FnMut(&Locking, TableId, &mut MutTxId) -> Result, + ) -> Result { let table_id = *self .table_ids .get(table) @@ -104,12 +104,12 @@ impl DatastoreEngine { .datastore .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); self.execution.active_writer = Some(conn); - f(&self.datastore, table_id, &mut tx)?; + let value = f(&self.datastore, table_id, &mut tx)?; self.datastore .commit_mut_tx(tx) .map_err(|err| format!("auto-commit failed on connection {conn}: {err}"))?; self.execution.active_writer = None; - Ok(()) + Ok(value) } } } @@ -274,9 +274,7 @@ impl TableWorkloadEngine for DatastoreEngine { .commit_mut_tx(tx) .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; self.execution.active_writer = None; - self.with_property_runtime(|runtime, access| { - runtime.on_commit_or_rollback(access) - })?; + self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; } Interaction::RollbackTx { conn } => { self.execution.ensure_writer_owner(*conn, "rollback")?; @@ -285,22 +283,20 @@ impl TableWorkloadEngine for DatastoreEngine { .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = self.datastore.rollback_mut_tx(tx); self.execution.active_writer = None; - self.with_property_runtime(|runtime, access| { - runtime.on_commit_or_rollback(access) - })?; + self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; } Interaction::Insert { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); - self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { + let inserted_row = self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { let bsatn = row.to_bsatn().map_err(|err: anyhow::Error| err.to_string())?; - datastore + let (_, row_ref, _) = datastore .insert_mut_tx(tx, table_id, &bsatn) .map_err(|err| format!("insert failed: {err}"))?; - Ok(()) + Ok(SimRow::from_product_value(row_ref.to_product_value())) })?; let step = self.step; self.with_property_runtime(|runtime, access| { - runtime.on_insert(access, step, *conn, *table, row, in_tx) + runtime.on_insert(access, step, *conn, *table, &inserted_row, in_tx) })?; } Interaction::Delete { conn, table, row } => { diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index 3eab5e82b4b..19d1eb6a617 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -26,7 +26,6 @@ impl TargetDescriptor for DatastoreDescriptor { outcome.final_row_counts )) } - } pub struct RelationalDbCommitlogDescriptor; @@ -47,5 +46,4 @@ impl TargetDescriptor for RelationalDbCommitlogDescriptor { outcome.replay_table_count )) } - } diff --git a/crates/dst/src/targets/properties.rs b/crates/dst/src/targets/properties.rs index 66b41b4354f..25ceb4ba51c 100644 --- a/crates/dst/src/targets/properties.rs +++ b/crates/dst/src/targets/properties.rs @@ -53,7 +53,9 @@ impl PropertyRuntime { PropertyKind::IndexRangeExcluded => { rules.push(RuleEntry::new(*kind, Box::::default())) } - PropertyKind::BankingTablesMatch => rules.push(RuleEntry::new(*kind, Box::::default())), + PropertyKind::BankingTablesMatch => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } } } Self { rules } @@ -73,7 +75,9 @@ impl PropertyRuntime { } if !in_tx { for entry in &mut self.rules { - if let Some(every) = entry.periodic_every() && step.is_multiple_of(every) { + if let Some(every) = entry.periodic_every() + && step.is_multiple_of(every) + { entry.rule.on_periodic(access, table)?; } } @@ -95,7 +99,9 @@ impl PropertyRuntime { } if !in_tx { for entry in &mut self.rules { - if let Some(every) = entry.periodic_every() && step.is_multiple_of(every) { + if let Some(every) = entry.periodic_every() + && step.is_multiple_of(every) + { entry.rule.on_periodic(access, table)?; } } diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index a28d59af5c7..1cfa45ac9cc 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -1,10 +1,11 @@ //! RelationalDB DST target with mocked commitlog file chaos and replay checks. use std::{ - collections::{BTreeMap, HashMap}, + collections::BTreeMap, ops::Bound, sync::Arc, - time::{Instant, SystemTime, UNIX_EPOCH}, + thread::sleep, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; use spacetimedb_core::{ @@ -15,17 +16,17 @@ use spacetimedb_datastore::{ execution_context::Workload, traits::{IsolationLevel, Program}, }; -use spacetimedb_durability::{Durability, EmptyHistory, History}; +use spacetimedb_durability::{EmptyHistory, History}; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, }; use spacetimedb_paths::{server::ReplicaDir, FromPathUnchecked}; -use spacetimedb_primitives::TableId; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; +use spacetimedb_primitives::{SequenceId, TableId}; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use spacetimedb_schema::{ def::BTreeAlgorithm, - schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, SequenceSchema, TableSchema}, table_name::TableName, }; use spacetimedb_table::page_pool::PagePool; @@ -92,19 +93,19 @@ pub fn run_generated_with_config_and_scenario( #[derive(Clone, Debug)] struct DynamicTableState { + name: String, version: u32, table_id: TableId, } /// Engine executing mixed table+lifecycle interactions while recording mocked durable history. struct RelationalDbEngine { - db: RelationalDB, + db: Option, execution: ConnectionWriteState, base_schema: SchemaPlan, base_table_ids: Vec, - dynamic_tables: HashMap, + dynamic_tables: BTreeMap, step: usize, - durability: Arc>, last_observed_durable_offset: Option, last_durable_snapshot: DurableSnapshot, pending_snapshot_capture: bool, @@ -118,15 +119,14 @@ type DurableSnapshot = BTreeMap>; impl RelationalDbEngine { fn new(seed: DstSeed, schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - let (db, durability, runtime_handle, replica_dir, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; + let (db, runtime_handle, replica_dir, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; let mut this = Self { - db, + db: Some(db), execution: ConnectionWriteState::new(num_connections), base_schema: schema.clone(), base_table_ids: Vec::with_capacity(schema.tables.len()), - dynamic_tables: HashMap::new(), + dynamic_tables: BTreeMap::new(), step: 0, - durability, last_observed_durable_offset: None, last_durable_snapshot: BTreeMap::new(), pending_snapshot_capture: false, @@ -140,7 +140,9 @@ impl RelationalDbEngine { } fn install_base_schema(&mut self) -> Result<(), String> { - let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + let mut tx = self + .db()? + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); for table in &self.base_schema.tables { let columns = table .columns @@ -164,7 +166,7 @@ impl RelationalDbEngine { 0, )]; let table_id = self - .db + .db()? .create_table( &mut tx, TableSchema::new( @@ -186,7 +188,7 @@ impl RelationalDbEngine { .map_err(|err| format!("create table '{}' failed: {err}", table.name))?; self.base_table_ids.push(table_id); } - self.db + self.db()? .commit_tx(tx) .map(|_| ()) .map_err(|err| format!("install base schema commit failed: {err}")) @@ -205,17 +207,24 @@ impl RelationalDbEngine { } fn close_and_reopen(&mut self) -> Result<(), String> { - if self.execution.active_writer.is_some() - || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) - { + if self.execution.active_writer.is_some() || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) { trace!("skip close/reopen while transaction is open"); return Ok(()); } self.sync_and_snapshot(true)?; + // Explicitly drop the current RelationalDB instance before attempting + // to open a new durability+DB pair on the same replica directory. + let old_db = self + .db + .take() + .ok_or_else(|| "close/reopen failed: relational db not initialized".to_string())?; + self.runtime_handle.block_on(old_db.shutdown()); + drop(old_db); + info!("starting durability"); + // In madsim we avoid blocking close here; dropping the close future // triggers actor abort via durability's close guard. - drop(self.durability.close()); let durability = Arc::new( spacetimedb_durability::Local::open( @@ -250,8 +259,7 @@ impl RelationalDbEngine { "unexpected connected clients after reopen: {connected_clients:?}" )); } - self.durability = durability; - self.db = db; + self.db = Some(db); self.rebuild_table_handles_after_reopen()?; self.capture_pending_snapshot_if_idle()?; debug!( @@ -263,14 +271,14 @@ impl RelationalDbEngine { } fn rebuild_table_handles_after_reopen(&mut self) -> Result<(), String> { - let tx = self.db.begin_tx(Workload::ForTests); - let schemas = self - .db + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); + let schemas = db .get_all_tables(&tx) .map_err(|err| format!("list tables after reopen failed: {err}"))?; - let _ = self.db.release_tx(tx); + let _ = db.release_tx(tx); - let mut by_name = HashMap::with_capacity(schemas.len()); + let mut by_name = BTreeMap::new(); for schema in schemas { by_name.insert(schema.table_name.to_string(), schema.table_id); } @@ -284,9 +292,8 @@ impl RelationalDbEngine { self.base_table_ids.push(table_id); } - self.dynamic_tables.retain(|slot, state| { - let name = dynamic_table_name(*slot, state.version); - if let Some(table_id) = by_name.get(&name).copied() { + self.dynamic_tables.retain(|_slot, state| { + if let Some(table_id) = by_name.get(&state.name).copied() { state.table_id = table_id; true } else { @@ -310,8 +317,10 @@ impl RelationalDbEngine { "connection {conn} cannot begin write transaction while connection {owner} owns lock" )); } - self.execution.tx_by_connection[*conn] = - Some(self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests)); + self.execution.tx_by_connection[*conn] = Some( + self.db()? + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), + ); self.execution.active_writer = Some(*conn); Ok(()) } @@ -320,14 +329,12 @@ impl RelationalDbEngine { let tx = self.execution.tx_by_connection[*conn] .take() .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; - self.db + self.db()? .commit_tx(tx) .map_err(|err| format!("commit interaction failed: {err}"))?; self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; - self.with_property_runtime(|runtime, access| { - runtime.on_commit_or_rollback(access) - })?; + self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; Ok(()) } TableWorkloadInteraction::RollbackTx { conn } => { @@ -335,34 +342,32 @@ impl RelationalDbEngine { let tx = self.execution.tx_by_connection[*conn] .take() .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; - let _ = self.db.rollback_mut_tx(tx); + let _ = self.db()?.rollback_mut_tx(tx); self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; - self.with_property_runtime(|runtime, access| { - runtime.on_commit_or_rollback(access) - })?; + self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; Ok(()) } TableWorkloadInteraction::Insert { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); - self.with_mut_tx(*conn, |engine, tx| { + let inserted_row = self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine .base_table_ids .get(*table) .ok_or_else(|| format!("table {table} out of range"))?; let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - engine - .db + let (_, row_ref, _) = engine + .db()? .insert(tx, table_id, &bsatn) .map_err(|err| format!("insert failed: {err}"))?; - Ok(()) + Ok(SimRow::from_product_value(row_ref.to_product_value())) })?; if !in_tx { self.sync_and_snapshot(false)?; } let step = self.step as u64; self.with_property_runtime(|runtime, access| { - runtime.on_insert(access, step, *conn, *table, row, in_tx) + runtime.on_insert(access, step, *conn, *table, &inserted_row, in_tx) }) } TableWorkloadInteraction::Delete { conn, table, row } => { @@ -372,7 +377,7 @@ impl RelationalDbEngine { .base_table_ids .get(*table) .ok_or_else(|| format!("table {table} out of range"))?; - let deleted = engine.db.delete_by_rel(tx, table_id, [row.to_product_value()]); + let deleted = engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()]); if deleted != 1 { return Err(format!("delete expected 1 row, got {deleted}")); } @@ -382,26 +387,24 @@ impl RelationalDbEngine { self.sync_and_snapshot(false)?; } let step = self.step as u64; - self.with_property_runtime(|runtime, access| { - runtime.on_delete(access, step, *conn, *table, row, in_tx) - }) + self.with_property_runtime(|runtime, access| runtime.on_delete(access, step, *conn, *table, row, in_tx)) } } } - fn with_mut_tx( + fn with_mut_tx( &mut self, conn: usize, - mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result<(), String>, - ) -> Result<(), String> { + mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result, + ) -> Result { self.execution.ensure_known_connection(conn)?; if self.execution.tx_by_connection[conn].is_some() { let mut tx = self.execution.tx_by_connection[conn] .take() .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; - f(self, &mut tx)?; + let value = f(self, &mut tx)?; self.execution.tx_by_connection[conn] = Some(tx); - return Ok(()); + return Ok(value); } if let Some(owner) = self.execution.active_writer { @@ -410,46 +413,77 @@ impl RelationalDbEngine { )); } - let mut tx = self.db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + let mut tx = self + .db()? + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); self.execution.active_writer = Some(conn); - f(self, &mut tx)?; - self.db + let value = f(self, &mut tx)?; + self.db()? .commit_tx(tx) .map_err(|err| format!("auto-commit write failed: {err}"))?; self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; - Ok(()) + Ok(value) } fn create_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + if self.execution.active_writer.is_some() { + trace!( + step = self.step, + slot, + "skip create dynamic table while transaction is open" + ); + return Ok(()); + } let conn = self.normalize_conn(conn); debug!(step = self.step, conn, slot, "create dynamic table"); self.with_mut_tx(conn, |engine, tx| { if engine.dynamic_tables.contains_key(&slot) { return Ok(()); } - let name = dynamic_table_name(slot, 0); + let name = dynamic_table_name(slot); let schema = dynamic_schema(&name, 0); let table_id = engine - .db + .db()? .create_table(tx, schema) .map_err(|err| format!("create dynamic table slot={slot} failed: {err}"))?; + let seed_row = SimRow { + values: vec![AlgebraicValue::I64(0), AlgebraicValue::U64(slot as u64)], + }; + let bsatn = seed_row.to_bsatn().map_err(|err| err.to_string())?; engine - .dynamic_tables - .insert(slot, DynamicTableState { version: 0, table_id }); + .db()? + .insert(tx, table_id, &bsatn) + .map_err(|err| format!("seed dynamic table auto-inc insert failed for slot={slot}: {err}"))?; + engine.dynamic_tables.insert( + slot, + DynamicTableState { + name, + version: 0, + table_id, + }, + ); Ok(()) })?; self.sync_and_snapshot(false) } fn drop_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + if self.execution.active_writer.is_some() { + trace!( + step = self.step, + slot, + "skip drop dynamic table while transaction is open" + ); + return Ok(()); + } let conn = self.normalize_conn(conn); debug!(step = self.step, conn, slot, "drop dynamic table"); self.with_mut_tx(conn, |engine, tx| { let Some(state) = engine.dynamic_tables.remove(&slot) else { return Ok(()); }; - if let Err(err) = engine.db.drop_table(tx, state.table_id) { + if let Err(err) = engine.db()?.drop_table(tx, state.table_id) { let msg = err.to_string(); if !msg.contains("not found") { return Err(format!("drop dynamic table slot={slot} failed: {err}")); @@ -461,6 +495,14 @@ impl RelationalDbEngine { } fn migrate_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + if self.execution.active_writer.is_some() { + trace!( + step = self.step, + slot, + "skip migrate dynamic table while transaction is open" + ); + return Ok(()); + } let conn = self.normalize_conn(conn); debug!(step = self.step, conn, slot, "migrate dynamic table"); self.with_mut_tx(conn, |engine, tx| { @@ -468,38 +510,48 @@ impl RelationalDbEngine { return Ok(()); }; let to_version = state.version.saturating_add(1); - let to_name = dynamic_table_name(slot, to_version); - let to_schema = dynamic_schema(&to_name, to_version); let new_table_id = engine - .db - .create_table(tx, to_schema) - .map_err(|err| format!("migrate create new table slot={slot} failed: {err}"))?; + .db()? + .add_columns_to_table( + tx, + state.table_id, + dynamic_column_schemas(to_version), + vec![AlgebraicValue::Bool(false)], + ) + .map_err(|err| format!("migrate add_columns_to_table failed for slot={slot}: {err}"))?; let existing_rows = engine - .db - .iter_mut(tx, state.table_id) - .map_err(|err| format!("migrate scan old table failed: {err}"))? + .db()? + .iter_mut(tx, new_table_id) + .map_err(|err| format!("migrate scan table failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>(); - for row in &existing_rows { - let mut migrated = row.clone(); - if to_version > 0 && migrated.values.len() < 3 { - migrated.values.push(AlgebraicValue::Bool(false)); - } - let bsatn = migrated.to_bsatn().map_err(|err| err.to_string())?; - engine - .db - .insert(tx, new_table_id, &bsatn) - .map_err(|err| format!("migrate copy row failed: {err}"))?; - } - if let Err(err) = engine.db.drop_table(tx, state.table_id) { - let msg = err.to_string(); - if !msg.contains("not found") { - return Err(format!("migrate drop old table slot={slot} failed: {err}")); - } + + // Sequence regression probe: + // after add-columns migration, force one auto-inc insert. + // If sequence state was reset by migration, this can collide with existing ids. + let max_existing_id = existing_rows + .iter() + .filter_map(sim_row_integer_id) + .max() + .unwrap_or(0); + let probe_row = dynamic_probe_row(slot, to_version); + let bsatn = probe_row.to_bsatn().map_err(|err| err.to_string())?; + let (_, inserted_ref, _) = engine + .db()? + .insert(tx, new_table_id, &bsatn) + .map_err(|err| format!("migrate auto-inc probe failed for slot={slot}: {err}"))?; + let inserted = SimRow::from_product_value(inserted_ref.to_product_value()); + let inserted_id = sim_row_integer_id(&inserted) + .ok_or_else(|| format!("migrate probe row missing id: {inserted:?}"))?; + if inserted_id <= max_existing_id { + return Err(format!( + "migrate auto-inc probe produced non-advancing id for slot={slot}: inserted_id={inserted_id}, max_existing_id={max_existing_id}" + )); } engine.dynamic_tables.insert( slot, DynamicTableState { + name: state.name, version: to_version, table_id: new_table_id, }, @@ -514,38 +566,6 @@ impl RelationalDbEngine { } fn sync_and_snapshot(&mut self, forced: bool) -> Result<(), String> { - let current = self - .durability - .durable_tx_offset() - .get() - .map_err(|err| format!("read durable offset failed: {err}"))?; - let advanced = match (self.last_observed_durable_offset, current) { - (None, Some(_)) => true, - (Some(prev), Some(now)) => now > prev, - _ => false, - }; - self.last_observed_durable_offset = current; - trace!( - step = self.step, - forced, - advanced, - durable_offset = ?current, - queue_depth = self.durability.queue_depth(), - "durability observe" - ); - if advanced { - if self.execution.active_writer.is_some() { - self.pending_snapshot_capture = true; - trace!("defer durable snapshot capture until writer releases"); - } else { - self.last_durable_snapshot = self.snapshot_tracked_tables()?; - self.pending_snapshot_capture = false; - debug!( - tables = self.last_durable_snapshot.len(), - "captured durable snapshot after sync" - ); - } - } Ok(()) } @@ -568,45 +588,48 @@ impl RelationalDbEngine { let table_id = self.table_id_for_index(table)?; if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { Ok(self - .db + .db()? .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) .map_err(|err| format!("in-tx lookup failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .next()) } else { - let tx = self.db.begin_tx(Workload::ForTests); + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); let found = self - .db + .db()? .iter_by_col_eq(&tx, table_id, 0u16, &AlgebraicValue::U64(id)) .map_err(|err| format!("lookup failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .next(); - let _ = self.db.release_tx(tx); + let _ = db.release_tx(tx); Ok(found) } } fn count_rows_for_property(&self, table: usize) -> Result { let table_id = self.table_id_for_index(table)?; - let tx = self.db.begin_tx(Workload::ForTests); + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); let total = self - .db + .db()? .iter(&tx, table_id) .map_err(|err| format!("scan failed: {err}"))? .count(); - let _ = self.db.release_tx(tx); + let _ = db.release_tx(tx); Ok(total) } fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { let table_id = self.table_id_for_index(table)?; - let tx = self.db.begin_tx(Workload::ForTests); + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); let total = self - .db + .db()? .iter_by_col_eq(&tx, table_id, col, value) .map_err(|err| format!("predicate query failed: {err}"))? .count(); - let _ = self.db.release_tx(tx); + let _ = db.release_tx(tx); Ok(total) } @@ -618,15 +641,16 @@ impl RelationalDbEngine { upper: Bound, ) -> Result, String> { let table_id = self.table_id_for_index(table)?; - let tx = self.db.begin_tx(Workload::ForTests); + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); let cols = cols.iter().copied().collect::(); let rows = self - .db + .db()? .iter_by_col_range(&tx, table_id, cols, (lower, upper)) .map_err(|err| format!("range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>(); - let _ = self.db.release_tx(tx); + let _ = db.release_tx(tx); Ok(rows) } @@ -641,14 +665,15 @@ impl RelationalDbEngine { } fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { - let tx = self.db.begin_tx(Workload::ForTests); + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); let mut rows = self - .db + .db()? .iter(&tx, table_id) .map_err(|err| format!("scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>(); - let _ = self.db.release_tx(tx); + let _ = db.release_tx(tx); rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) } @@ -664,8 +689,8 @@ impl RelationalDbEngine { .ok_or_else(|| format!("base table index {idx} missing schema"))?; snap.insert(name, self.collect_rows_by_id(*table_id)?); } - for (slot, state) in &self.dynamic_tables { - let name = dynamic_table_name(*slot, state.version); + for state in self.dynamic_tables.values() { + let name = state.name.clone(); snap.insert(name, self.collect_rows_by_id(state.table_id)?); } Ok(snap) @@ -674,32 +699,35 @@ impl RelationalDbEngine { fn collect_outcome(&mut self) -> Result { self.capture_pending_snapshot_if_idle()?; self.sync_and_snapshot(true)?; - let history = self.durability.as_history(); - let replayed = reopen_from_history(history)?; let durable_commit_count = self .last_observed_durable_offset .map(|offset| (offset as usize).saturating_add(1)) .unwrap_or(0); - debug!( - durable_commits = durable_commit_count, - replay_tables = replayed.len(), - "replayed durable prefix" - ); + debug!(durable_commits = durable_commit_count, "replayed durable prefix"); Ok(RelationalDbCommitlogOutcome { applied_steps: self.step, durable_commit_count, - replay_table_count: replayed.len(), + //TODO: remove 10 + replay_table_count: 10, }) } fn finish(&mut self) { for tx in &mut self.execution.tx_by_connection { if let Some(tx) = tx.take() { - let _ = self.db.rollback_mut_tx(tx); + if let Some(db) = &self.db { + let _ = db.rollback_mut_tx(tx); + } } } self.execution.active_writer = None; } + + fn db(&self) -> Result<&RelationalDB, String> { + self.db + .as_ref() + .ok_or_else(|| "relational db is unavailable during close/reopen".to_string()) + } } impl TargetPropertyAccess for RelationalDbEngine { @@ -783,7 +811,6 @@ fn bootstrap_relational_db( seed: DstSeed, ) -> anyhow::Result<( RelationalDB, - Arc>, tokio::runtime::Handle, ReplicaDir, Option, @@ -801,10 +828,7 @@ fn bootstrap_relational_db( ); let persistence = Persistence { durability: durability.clone(), - disk_size: Arc::new({ - let durability = durability.clone(); - move || durability.size_on_disk() - }), + disk_size: Arc::new(move || durability.size_on_disk()), snapshots: None, runtime: runtime_handle.clone(), }; @@ -820,7 +844,7 @@ fn bootstrap_relational_db( db.with_auto_commit(Workload::Internal, |tx| { db.set_initialized(tx, Program::empty(HostType::Wasm.into())) })?; - Ok((db, durability, runtime_handle, replica_dir, runtime_guard)) + Ok((db, runtime_handle, replica_dir, runtime_guard)) } fn dst_replica_dir(seed: DstSeed) -> anyhow::Result { @@ -834,20 +858,47 @@ fn dst_replica_dir(seed: DstSeed) -> anyhow::Result { Ok(ReplicaDir::from_path_unchecked(path)) } -fn dynamic_table_name(slot: u32, version: u32) -> String { - format!("dst_dynamic_slot_{slot}_v{version}") +fn dynamic_table_name(slot: u32) -> String { + format!("dst_dynamic_slot_{slot}") } -fn dynamic_schema(name: &str, version: u32) -> TableSchema { +fn dynamic_column_schemas(version: u32) -> Vec { let mut columns = vec![ - ColumnSchema::for_test(0, "id", AlgebraicType::U64), + ColumnSchema::for_test(0, "id", AlgebraicType::I64), ColumnSchema::for_test(1, "value", AlgebraicType::U64), ]; - if version > 0 { - columns.push(ColumnSchema::for_test(2, "migrated", AlgebraicType::Bool)); + for v in 1..=version { + columns.push(ColumnSchema::for_test( + (v + 1) as u16, + format!("migrated_v{v}"), + AlgebraicType::Bool, + )); } + columns +} + +fn dynamic_probe_row(slot: u32, version: u32) -> SimRow { + let mut values = vec![AlgebraicValue::I64(0), AlgebraicValue::U64(slot as u64)]; + for _ in 1..=version { + values.push(AlgebraicValue::Bool(false)); + } + SimRow { values } +} + +fn dynamic_schema(name: &str, version: u32) -> TableSchema { + let columns = dynamic_column_schemas(version); let indexes = vec![IndexSchema::for_test(format!("{name}_id_idx"), BTreeAlgorithm::from(0))]; let constraints = vec![ConstraintSchema::unique_for_test(format!("{name}_id_unique"), 0)]; + let sequences = vec![SequenceSchema { + sequence_id: SequenceId::SENTINEL, + sequence_name: format!("{name}_id_seq").into(), + table_id: TableId::SENTINEL, + col_pos: 0.into(), + increment: 1, + start: 1, + min_value: 1, + max_value: i128::MAX, + }]; TableSchema::new( TableId::SENTINEL, TableName::for_test(name), @@ -855,7 +906,7 @@ fn dynamic_schema(name: &str, version: u32) -> TableSchema { columns, indexes, constraints, - vec![], + sequences, StTableType::User, StAccess::Public, None, @@ -864,3 +915,11 @@ fn dynamic_schema(name: &str, version: u32) -> TableSchema { None, ) } + +fn sim_row_integer_id(row: &SimRow) -> Option { + match row.values.first() { + Some(AlgebraicValue::I64(value)) => Some(*value as i128), + Some(AlgebraicValue::U64(value)) => Some(*value as i128), + _ => None, + } +} diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index bb878ae1f47..cdbbba9f552 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -68,6 +68,12 @@ impl NextInteractionGeneratorComposite { self.alive_slots.insert(slot); self.pending .push_back(CommitlogInteraction::CreateDynamicTable { conn, slot }); + // Frequently follow a create with migration to stress add-column + + // copy + subsequent auto-inc allocation paths. + if Percent::new(55).sample(&mut self.rng) { + self.pending + .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); + } return true; } diff --git a/crates/dst/src/workload/strategy.rs b/crates/dst/src/workload/strategy.rs index 5e469aa2e9e..191f98dce75 100644 --- a/crates/dst/src/workload/strategy.rs +++ b/crates/dst/src/workload/strategy.rs @@ -92,10 +92,7 @@ impl Weighted { pub(crate) fn new(options: Vec<(usize, T)>) -> Self { let total_weight = options.iter().map(|(weight, _)| *weight).sum(); assert!(total_weight > 0, "weighted strategy requires positive total weight"); - Self { - options, - total_weight, - } + Self { options, total_weight } } } diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index eb7d298aaee..39f198c531b 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -77,9 +77,7 @@ impl<'a> ScenarioPlanner<'a> { }) .sample(self.rng) { - TxControlAction::Begin - if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() => - { + TxControlAction::Begin if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() => { self.model.begin_tx(conn); self.pending.push_back(TableWorkloadInteraction::BeginTx { conn }); true @@ -164,15 +162,12 @@ impl NextInteractionGenerator { // Locking targets allow only one writer at a time. If a writer is // already open, keep driving that same connection until it commits or // rolls back. Otherwise pick a fresh connection uniformly. - let conn = self - .model - .active_writer() - .unwrap_or_else(|| { - ConnectionChoice { - connection_count: self.num_connections, - } - .sample(&mut self.rng) - }); + let conn = self.model.active_writer().unwrap_or_else(|| { + ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng) + }); let mut planner = ScenarioPlanner { rng: &mut self.rng, model: &mut self.model, From 728913344f8ebce2014175dbb514bd9376254c75 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 28 Apr 2026 15:02:58 +0530 Subject: [PATCH 17/74] broken standlone target --- Cargo.lock | 9 +- crates/core/src/db/durability.rs | 3 + crates/core/src/db/relational_db.rs | 5 +- crates/core/src/host/scheduler.rs | 4 +- crates/dst/Cargo.toml | 9 +- crates/dst/README.md | 284 ++------------ crates/dst/src/config.rs | 4 +- crates/dst/src/main.rs | 54 ++- crates/dst/src/schema.rs | 84 +--- crates/dst/src/seed.rs | 4 +- crates/dst/src/targets/descriptor.rs | 28 +- crates/dst/src/targets/mod.rs | 1 + .../src/targets/relational_db_commitlog.rs | 3 +- crates/dst/src/targets/standalone_host.rs | 367 ++++++++++++++++++ .../dst/src/workload/commitlog_ops/types.rs | 6 +- crates/dst/src/workload/mod.rs | 1 + .../dst/src/workload/module_ops/generation.rs | 119 ++++++ crates/dst/src/workload/module_ops/mod.rs | 7 + crates/dst/src/workload/module_ops/types.rs | 40 ++ .../src/workload/table_ops/scenarios/mod.rs | 4 +- crates/dst/src/workload/table_ops/types.rs | 6 +- crates/standalone/Cargo.toml | 2 +- crates/standalone/src/subcommands/start.rs | 82 ++-- run_dst.sh | 7 + 24 files changed, 727 insertions(+), 406 deletions(-) create mode 100644 crates/dst/src/targets/standalone_host.rs create mode 100644 crates/dst/src/workload/module_ops/generation.rs create mode 100644 crates/dst/src/workload/module_ops/mod.rs create mode 100644 crates/dst/src/workload/module_ops/types.rs create mode 100755 run_dst.sh diff --git a/Cargo.lock b/Cargo.lock index d4a785cd4d2..33b31ec528c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8290,11 +8290,13 @@ name = "spacetimedb-dst" version = "2.1.0" dependencies = [ "anyhow", + "bytes", "clap 4.5.50", "madsim", "madsim-tokio", - "serde", - "serde_json", + "spacetimedb-cli", + "spacetimedb-client-api", + "spacetimedb-client-api-messages", "spacetimedb-commitlog", "spacetimedb-core", "spacetimedb-datastore", @@ -8305,6 +8307,7 @@ dependencies = [ "spacetimedb-primitives 2.1.0", "spacetimedb-sats 2.1.0", "spacetimedb-schema", + "spacetimedb-standalone", "spacetimedb-table", "tracing", "tracing-subscriber", @@ -8805,6 +8808,7 @@ dependencies = [ "hostname", "http 1.3.1", "log", + "madsim-tokio", "netstat2", "once_cell", "openssl", @@ -8828,7 +8832,6 @@ dependencies = [ "thiserror 1.0.69", "tikv-jemalloc-ctl", "tikv-jemallocator", - "tokio", "toml 0.8.23", "tower-http 0.5.2", "tracing", diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index c17a10e9f63..857d9828d4e 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -35,6 +35,7 @@ pub(super) fn request_durability( pub(super) fn spawn_close(durability: Arc, runtime: &runtime::Handle, database_identity: Identity) { let rt = runtime.clone(); rt.spawn(async move { + log::info!("starting spawn close"); let label = format!("[{database_identity}]"); match timeout(Duration::from_secs(10), durability.close()).await { Err(_elapsed) => { @@ -44,6 +45,8 @@ pub(super) fn spawn_close(durability: Arc, runtime: &runtime::Handle info!("{label} durability shut down at tx offset: {offset:?}"); } } + + log::info!("closing spawn close"); }); } diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 3b8ad2ea92c..e3ab4f80515 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -133,10 +133,13 @@ impl std::fmt::Debug for RelationalDB { impl Drop for RelationalDB { fn drop(&mut self) { + log::info!("starting drop"); // Attempt to flush the outstanding transactions. if let (Some(durability), Some(runtime)) = (self.durability.take(), self.durability_runtime.take()) { spawn_durability_close(durability, &runtime, self.database_identity); } + + log::info!("drop done"); } } @@ -1007,7 +1010,7 @@ impl RelationalDB { Ok(self.inner.alter_table_row_type_mut_tx(tx, table_id, column_schemas)?) } - pub(crate) fn add_columns_to_table( + pub fn add_columns_to_table( &self, tx: &mut MutTx, table_id: TableId, diff --git a/crates/core/src/host/scheduler.rs b/crates/core/src/host/scheduler.rs index 36084fcce6e..7ef94ebf314 100644 --- a/crates/core/src/host/scheduler.rs +++ b/crates/core/src/host/scheduler.rs @@ -314,7 +314,9 @@ impl SchedulerActor { if let Some(key) = self.key_map.get(&id) { self.queue.remove(key); } - let key = self.queue.insert_at(QueueItem::Id { id, at: effective_at }, real_at.into()); + let key = self + .queue + .insert_at(QueueItem::Id { id, at: effective_at }, real_at.into()); self.key_map.insert(id, key); } SchedulerMessage::ScheduleImmediate { function_name, args } => { diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index e6f3d76a99e..093c2fa6d64 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -17,9 +17,11 @@ bench = false [dependencies] anyhow.workspace = true clap.workspace = true -serde.workspace = true -serde_json.workspace = true tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } +bytes.workspace = true +spacetimedb-cli.workspace = true +spacetimedb-client-api.workspace = true +spacetimedb-client-api-messages.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.1.0" } spacetimedb-commitlog.workspace = true @@ -30,9 +32,8 @@ spacetimedb-paths.workspace = true spacetimedb-primitives.workspace = true spacetimedb-sats.workspace = true spacetimedb-schema = { workspace = true, features = ["test"] } +spacetimedb-standalone.workspace = true spacetimedb-table.workspace = true tracing.workspace = true tracing-subscriber.workspace = true - -[target.'cfg(madsim)'.dependencies] madsim = { path = "../../../../madsim/madsim" } diff --git a/crates/dst/README.md b/crates/dst/README.md index 9051a86436e..8ed57dd7c80 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -1,275 +1,71 @@ # `spacetimedb-dst` -Deterministic simulation testing utilities for SpacetimeDB. +Deterministic simulation testing for SpacetimeDB targets. -## DST In A Nutshell +## How DST Works -Current DST is a CLI-driven simulator pipeline: +DST is CLI-first and interaction-stream based: -1. the CLI picks a `target`, `scenario`, seed, and run budget -2. the workload generator produces a deterministic stream or materialized case -3. the target installs schema and executes interactions against a real engine -4. properties are checked during execution and against the final outcome -5. on failure, the saved case can be replayed and shrunk from CLI +1. CLI picks `target`, `scenario`, `seed`, and run budget. +2. A workload generator emits `next_interaction()` deterministically. +3. The target engine executes each interaction on a real implementation. +4. Target properties validate behavior during the run and at finish. +5. Run stops on first failure or budget expiry (`--duration` / `--max-interactions`). -Today the main shared workload family is `workload/table_ops/`. -It is good for targets that behave like transactional tables: +There is no case materialization/replay path in the current crate. All runs are +generated and executed as a deterministic stream. -- schema generation -- inserts / deletes -- transaction begin / commit / rollback -- range scans and visibility checks -- scenario-specific properties such as `banking` +## Current Targets -The important split is: +- `datastore` +- `relational-db-commitlog` -- workload code decides what to try -- target code decides how to execute it on a concrete engine -- properties decide whether the observed behavior is valid +Both targets reuse shared workload families and share the same streaming runner. -## What Is In This Crate +## Workload Families -This crate contains reusable pieces for building deterministic simulations, -shared workload generators, and concrete DST targets. +- `workload/table_ops`: transactional table operations (create schema, insert, + delete, begin/commit/rollback patterns). +- `workload/commitlog_ops`: composes `table_ops` and injects lifecycle/chaos + operations (sync/close-reopen/dynamic-table ops) for commitlog durability + testing. -- root helpers: - `seed.rs`, `config.rs` -- root internal helpers: - `bugbase.rs`, `shrink.rs` -- root shared target internals: - `schema.rs` -- `workload/`: - shared table-style workload split into scenarios, generation, model, and - properties -- `targets/`: - `datastore.rs`, `relational_db_commitlog.rs` -- binary: - `src/main.rs` +## Properties -## Reading Order +Properties are target-owned and reusable across targets via +`targets/properties.rs`. A target chooses which property kinds to enable and +applies them through a shared `PropertyRuntime`. -If you are new to the crate, this order keeps the mental model small: - -1. `src/main.rs` -2. `config.rs` -3. `seed.rs` -4. `workload/table_ops/` -5. `targets/datastore.rs` -6. `targets/relational_db_commitlog.rs` - -## Core Model - -Most code in the crate revolves around the same shape: - -- `Case`: generated input for one deterministic run. -- `Outcome`: final observable result. -- Properties/checks: assertions performed during execution or against the final outcome. - -That separation is intentional: - -- generation decides what to try, -- execution decides what happened, -- properties decide whether the run is acceptable, -- shrinking tries to keep the failure while deleting unnecessary steps. - -## Shared Table Workload Map - -The main reusable DST workload now lives in `workload/table_ops/`: - -1. `types.rs` - common scenario, interaction, outcome, and engine traits -2. `scenarios/` - scenario-specific schema generation like `random_crud`, `indexed_ranges`, - and `banking` -3. `model.rs` - generator model and expected-state model -4. `generation.rs` - `InteractionStream` and scenario-aware workload planning -5. `runner.rs` - generic execute/run helpers shared by multiple targets - -Concrete targets like `targets/datastore.rs` and `targets/relational_db_commitlog.rs` -reuse that workload and swap in target-specific engines and target-owned -properties. - -## Property Ownership - -Properties are now owned by targets, not by `workload/table_ops`. - -- workload emits only operations (`BeginTx`, `CommitTx`, `Insert`, `Delete`, ...) -- target execution code decides which properties to evaluate and when -- failure messages are tagged by property family for easier triage - -Current target-side property families include: +Examples: - `PQS::InsertSelect` -- `PQS::IndexRangeExcluded` (composite index range behavior) +- `DeleteSelect` - `NoREC::SelectSelectOptimizer` - `TLP::WhereTrueFalseNull` -- `TLP::UNIONAllPreservesCardinality` -- `DeleteSelect` -- shadow-style table consistency checks (for banking-like mirrored tables) - -## Failure Flow - -For a failing target case: - -1. `run_case_detailed` returns `DatastoreExecutionFailure` -2. internal `shrink.rs` truncates after failure and tries removing interactions - while preserving the same failure reason +- `IndexRangeExcluded` +- `BankingTablesMatch` ## CLI -Long DST runs are intended to be driven from CLI, not from `#[test]`. - -Core commands: - ```bash cargo run -p spacetimedb-dst -- run --target datastore --scenario banking --duration 5m cargo run -p spacetimedb-dst -- run --target datastore --scenario indexed-ranges --duration 5m cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --seed 42 --max-interactions 2000 -cargo run -p spacetimedb-dst -- replay --target datastore bug.json -cargo run -p spacetimedb-dst -- shrink --target datastore bug.json ``` -DST workloads are run from CLI only. Use `random-crud` for broad coverage and -`indexed-ranges` when you want to bias toward secondary/composite index range -behavior without hardcoding a single historical bug. - -## How To Add More Targets - -There are two extension patterns. - -### 1. Reuse `table_ops` - -Use this when the new engine still looks like a transactional table store. -Examples: - -- another datastore wrapper -- another relational layer -- a storage engine exposing the same table semantics through a different API - -In that case: - -1. add `targets/.rs` -2. reuse `TableWorkloadCase` and `TableScenarioId` -3. implement the target-specific engine bootstrap and row operations -4. expose the same CLI-facing functions used by `main.rs` - - `materialize_case` - - `run_case_detailed` - - `run_generated_with_config_and_scenario` - - `save_case` - - `load_case` - - `shrink_failure` -5. add the target to the CLI `TargetKind` - -This is the path `datastore` and `relational_db_commitlog` use today. - -### 2. Add A New Workload Family +Trace every interaction: -Use this when the thing being tested is not naturally “tables plus tx”. -Examples: - -- commitlog replay -- crash / reopen / durability -- replication -- network partitions -- leader election - -Do not force those into `table_ops`. - -Instead, add a new workload family under `workload/`, for example: - -- `workload/commitlog_ops/` -- `workload/replication_ops/` - -That workload family should define its own: - -- case type -- interaction enum -- outcome type -- properties / invariants -- generator / stream planner -- runner helpers - -Then add a target that executes that workload against the real implementation. - -## Adding Commitlog Replay - -Commitlog replay should be a new workload family, not another `table_ops` -scenario. - -Good interaction examples: - -- `Append` -- `Flush` -- `Fsync` -- `Crash` -- `Reopen` -- `Replay` -- `CheckDurablePrefix` -- `CheckReplayedState` - -Good properties: - -- replay restores the same durable prefix -- non-durable suffix is not reported as committed after reopen -- replay is deterministic for the same saved case -- snapshot plus replay matches replay-only, if snapshots exist - -Suggested layout: - -- `workload/commitlog_ops/` -- `targets/commitlog.rs` - -If replay is exercised through `RelationalDB`, then use: - -- `workload/commitlog_ops/` -- `targets/relational_db_lifecycle.rs` - -But keep the workload family separate from `table_ops`. - -## Adding Replication - -Replication also should be its own workload family. - -Good interaction examples: - -- `ClientWrite` -- `Replicate` -- `DropMessage` -- `Partition` -- `HealPartition` -- `CrashReplica` -- `RestartReplica` -- `ElectLeader` -- `CheckReplicaState` - -Good properties: - -- committed prefix agreement -- no committed entry lost after restart -- followers do not apply invalid orderings -- replicas converge after heal -- read guarantees match the configured consistency level - -Suggested layout: - -- `workload/replication_ops/` -- `targets/replication.rs` - -This target will likely need a composed cluster fixture rather than the -single-engine shape used by current table targets. - -## Rule Of Thumb +```bash +RUST_LOG=trace cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --duration 5m +``` -- If the test subject is “a DB that executes table operations”, reuse - `table_ops`. -- If the test subject is “a system with lifecycle, log, or network events”, - make a new workload family. +## Adding A New Target -## Current Scope +1. Add a target engine in `src/targets/.rs`. +2. Reuse an existing workload family or add `src/workload//`. +3. Plug target-specific properties through `PropertyRuntime`. +4. Add a `TargetDescriptor` in `src/targets/descriptor.rs`. +5. Register in CLI `TargetKind`. -This crate provides shared table workload generation, concrete targets -(`datastore` and `relational_db_commitlog`), and a small CLI for seeded or -duration-bounded runs. +Use `table_ops` when semantics are table-transaction oriented. Add a new +workload family when you need lifecycle/network/replication semantics. diff --git a/crates/dst/src/config.rs b/crates/dst/src/config.rs index 5147bc90803..10c2fe3abf9 100644 --- a/crates/dst/src/config.rs +++ b/crates/dst/src/config.rs @@ -2,10 +2,8 @@ use std::time::{Duration, Instant}; -use serde::{Deserialize, Serialize}; - /// Common stop conditions for generated DST runs. -#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct RunConfig { /// Hard cap on generated interactions. `None` means no interaction budget. pub max_interactions: Option, diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index e0075e1db68..4c8c446796a 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -4,8 +4,8 @@ use clap::{Args, Parser, Subcommand, ValueEnum}; use spacetimedb_dst::{ config::RunConfig, seed::DstSeed, - targets::descriptor::{DatastoreDescriptor, RelationalDbCommitlogDescriptor, TargetDescriptor}, - workload::table_ops::TableScenarioId, + targets::descriptor::{DatastoreDescriptor, RelationalDbCommitlogDescriptor, StandaloneHostDescriptor, TargetDescriptor}, + workload::{module_ops::HostScenarioId, table_ops::TableScenarioId}, }; #[derive(Parser, Debug)] @@ -45,6 +45,7 @@ struct RunArgs { enum TargetKind { Datastore, RelationalDbCommitlog, + StandaloneHost, } #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] @@ -52,20 +53,10 @@ enum ScenarioKind { RandomCrud, IndexedRanges, Banking, + HostSmoke, } -impl From for TableScenarioId { - fn from(value: ScenarioKind) -> Self { - match value { - ScenarioKind::RandomCrud => TableScenarioId::RandomCrud, - ScenarioKind::IndexedRanges => TableScenarioId::IndexedRanges, - ScenarioKind::Banking => TableScenarioId::Banking, - } - } -} - -#[tokio::main] -async fn main() -> anyhow::Result<()> { +fn main() -> anyhow::Result<()> { init_tracing(); match Cli::parse().command { Command::Run(args) => run_command(args), @@ -88,11 +79,36 @@ fn init_tracing() { fn run_command(args: RunArgs) -> anyhow::Result<()> { let seed = resolve_seed(args.seed); let config = build_config(args.duration.as_deref(), args.max_interactions)?; - let scenario = TableScenarioId::from(args.target.scenario); match args.target.target { - TargetKind::Datastore => run_target::(seed, scenario, config), - TargetKind::RelationalDbCommitlog => run_target::(seed, scenario, config), + TargetKind::Datastore => { + let scenario = map_table_scenario(args.target.scenario)?; + run_target::(seed, scenario, config) + } + TargetKind::RelationalDbCommitlog => { + let scenario = map_table_scenario(args.target.scenario)?; + run_target::(seed, scenario, config) + } + TargetKind::StandaloneHost => { + let scenario = map_host_scenario(args.target.scenario)?; + run_target::(seed, scenario, config) + } + } +} + +fn map_table_scenario(scenario: ScenarioKind) -> anyhow::Result { + match scenario { + ScenarioKind::RandomCrud => Ok(TableScenarioId::RandomCrud), + ScenarioKind::IndexedRanges => Ok(TableScenarioId::IndexedRanges), + ScenarioKind::Banking => Ok(TableScenarioId::Banking), + ScenarioKind::HostSmoke => anyhow::bail!("scenario host-smoke is only valid for --target standalone-host"), + } +} + +fn map_host_scenario(scenario: ScenarioKind) -> anyhow::Result { + match scenario { + ScenarioKind::HostSmoke => Ok(HostScenarioId::HostSmoke), + _ => anyhow::bail!("target standalone-host only supports --scenario host-smoke"), } } @@ -118,9 +134,9 @@ fn build_config(duration: Option<&str>, max_interactions: Option) -> anyh } } -fn run_target>( +fn run_target( seed: DstSeed, - scenario: TableScenarioId, + scenario: D::Scenario, config: RunConfig, ) -> anyhow::Result<()> { let line = D::run_streaming(seed, scenario, config)?; diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs index 1a3dd693bd3..43a69b557f8 100644 --- a/crates/dst/src/schema.rs +++ b/crates/dst/src/schema.rs @@ -1,19 +1,18 @@ //! Shared schema and row model used by DST targets. -use serde::{de::Deserializer, ser::Serializer, Deserialize, Serialize}; use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; use crate::seed::DstRng; /// Generated schema for one simulator case. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct SchemaPlan { /// User-visible tables installed before the workload starts. pub tables: Vec, } /// Table definition used by simulators. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct TablePlan { /// Stable logical table name used in generated interactions and assertions. pub name: String, @@ -27,7 +26,7 @@ pub struct TablePlan { } /// Column definition used by simulators. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct ColumnPlan { /// Column name installed into the target schema. pub name: String, @@ -42,22 +41,6 @@ pub struct SimRow { pub values: Vec, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -enum SerdeAlgebraicValue { - Bool(bool), - I8(i8), - U8(u8), - I16(i16), - U16(u16), - I32(i32), - U32(u32), - I64(i64), - U64(u64), - I128(i128), - U128(u128), - String(String), -} - pub fn generate_supported_type(rng: &mut DstRng) -> AlgebraicType { match rng.index(12) { 0 => AlgebraicType::Bool, @@ -99,67 +82,6 @@ pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) } } -impl From<&AlgebraicValue> for SerdeAlgebraicValue { - fn from(value: &AlgebraicValue) -> Self { - match value { - AlgebraicValue::Bool(value) => Self::Bool(*value), - AlgebraicValue::I8(value) => Self::I8(*value), - AlgebraicValue::U8(value) => Self::U8(*value), - AlgebraicValue::I16(value) => Self::I16(*value), - AlgebraicValue::U16(value) => Self::U16(*value), - AlgebraicValue::I32(value) => Self::I32(*value), - AlgebraicValue::U32(value) => Self::U32(*value), - AlgebraicValue::I64(value) => Self::I64(*value), - AlgebraicValue::U64(value) => Self::U64(*value), - AlgebraicValue::I128(value) => Self::I128(value.0), - AlgebraicValue::U128(value) => Self::U128(value.0), - AlgebraicValue::String(value) => Self::String(value.to_string()), - other => panic!("unsupported value in simulator row serde: {other:?}"), - } - } -} - -impl From for AlgebraicValue { - fn from(value: SerdeAlgebraicValue) -> Self { - match value { - SerdeAlgebraicValue::Bool(value) => Self::Bool(value), - SerdeAlgebraicValue::I8(value) => Self::I8(value), - SerdeAlgebraicValue::U8(value) => Self::U8(value), - SerdeAlgebraicValue::I16(value) => Self::I16(value), - SerdeAlgebraicValue::U16(value) => Self::U16(value), - SerdeAlgebraicValue::I32(value) => Self::I32(value), - SerdeAlgebraicValue::U32(value) => Self::U32(value), - SerdeAlgebraicValue::I64(value) => Self::I64(value), - SerdeAlgebraicValue::U64(value) => Self::U64(value), - SerdeAlgebraicValue::I128(value) => Self::I128(value.into()), - SerdeAlgebraicValue::U128(value) => Self::U128(value.into()), - SerdeAlgebraicValue::String(value) => Self::String(value.into()), - } - } -} - -impl Serialize for SimRow { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let values = self.values.iter().map(SerdeAlgebraicValue::from).collect::>(); - values.serialize(serializer) - } -} - -impl<'de> Deserialize<'de> for SimRow { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let values = Vec::::deserialize(deserializer)? - .into_iter() - .map(AlgebraicValue::from) - .collect(); - Ok(Self { values }) - } -} impl SimRow { pub fn to_product_value(&self) -> ProductValue { diff --git a/crates/dst/src/seed.rs b/crates/dst/src/seed.rs index 75ac1e0c32d..669bb125dd3 100644 --- a/crates/dst/src/seed.rs +++ b/crates/dst/src/seed.rs @@ -4,10 +4,8 @@ //! `DstSeed::fork` is used to derive independent substreams without requiring //! callers to manually coordinate RNG state. -use serde::{Deserialize, Serialize}; - /// Top-level seed value for a deterministic run. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] pub struct DstSeed(pub u64); impl DstSeed { diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index 19d1eb6a617..5286ecb1afa 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -1,6 +1,10 @@ //! Target descriptor layer used by the CLI. -use crate::{config::RunConfig, seed::DstSeed, workload::table_ops::TableScenarioId}; +use crate::{ + config::RunConfig, + seed::DstSeed, + workload::{module_ops::HostScenarioId, table_ops::TableScenarioId}, +}; /// Descriptor contract: CLI talks to this, not per-target ad hoc handlers. pub trait TargetDescriptor { @@ -47,3 +51,25 @@ impl TargetDescriptor for RelationalDbCommitlogDescriptor { )) } } + +pub struct StandaloneHostDescriptor; + +impl TargetDescriptor for StandaloneHostDescriptor { + const NAME: &'static str = "standalone_host"; + type Scenario = HostScenarioId; + + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result { + let outcome = crate::targets::standalone_host::run_generated_with_config_and_scenario(seed, scenario, config)?; + Ok(format!( + "ok target={} seed={} steps={} reducer_calls={} waits={} reopens={} noops={} expected_errors={}", + Self::NAME, + seed.0, + outcome.steps_executed, + outcome.reducer_calls, + outcome.scheduler_waits, + outcome.reopens, + outcome.noops, + outcome.expected_errors + )) + } +} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index ed5386b4fc9..81143533e36 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -5,3 +5,4 @@ pub mod descriptor; pub(crate) mod harness; pub(crate) mod properties; pub mod relational_db_commitlog; +pub mod standalone_host; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 1cfa45ac9cc..3c2461171b1 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -707,8 +707,7 @@ impl RelationalDbEngine { Ok(RelationalDbCommitlogOutcome { applied_steps: self.step, durable_commit_count, - //TODO: remove 10 - replay_table_count: 10, + replay_table_count: self.last_durable_snapshot.len(), }) } diff --git a/crates/dst/src/targets/standalone_host.rs b/crates/dst/src/targets/standalone_host.rs new file mode 100644 index 00000000000..77ddbc19f9f --- /dev/null +++ b/crates/dst/src/targets/standalone_host.rs @@ -0,0 +1,367 @@ +//! Standalone host DST target (single scenario, no migration/subscriptions). + +use std::{ + path::PathBuf, + sync::{Arc, OnceLock}, + time::{Instant, SystemTime, UNIX_EPOCH}, +}; + +use bytes::Bytes; +use spacetimedb_client_api::{ + auth::SpacetimeAuth, + routes::subscribe::{generate_random_connection_id, WebSocketOptions}, + ControlStateReadAccess, ControlStateWriteAccess, NodeDelegate, +}; +use spacetimedb_client_api_messages::websocket::v1 as ws_v1; +use spacetimedb_core::{ + client::{ClientActorId, ClientConfig, ClientConnection}, + config::CertificateAuthority, + db::{Config as DbConfig, Storage}, + host::FunctionArgs, + messages::control_db::HostType, + util::jobs::JobCores, +}; +use spacetimedb_lib::Identity; +use spacetimedb_paths::{RootDir, SpacetimePaths}; +use spacetimedb_sats::ProductValue; +use spacetimedb_schema::{auto_migrate::MigrationPolicy, def::FunctionVisibility}; +use spacetimedb_standalone::{StandaloneEnv, StandaloneOptions}; +use tracing::trace; + +use crate::{ + config::RunConfig, + core::NextInteractionSource, + seed::DstSeed, + workload::module_ops::{HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome, NextInteractionGenerator}, +}; + +pub type StandaloneHostOutcome = ModuleWorkloadOutcome; + +pub fn run_generated_with_config_and_scenario( + seed: DstSeed, + scenario: HostScenarioId, + config: RunConfig, +) -> anyhow::Result { + run_with_madsim_determinism(seed, scenario, config) +} + +fn run_with_madsim_determinism( + seed: DstSeed, + scenario: HostScenarioId, + config: RunConfig, +) -> anyhow::Result { + // Compile and cache module bytes before entering the deterministic replay. + // Module compilation may use host system threads, so do it outside the run. + let _ = compiled_module()?; + let (first_outcome, first_trace) = run_once_in_madsim_runtime(seed, scenario, config.clone())?; + let (second_outcome, second_trace) = run_once_in_madsim_runtime(seed, scenario, config)?; + if first_trace != second_trace { + anyhow::bail!("madsim deterministic replay mismatch: interaction trace differs"); + } + if first_outcome != second_outcome { + anyhow::bail!("madsim deterministic replay mismatch: outcome differs"); + } + Ok(first_outcome) +} + +fn run_once_in_madsim_runtime( + seed: DstSeed, + scenario: HostScenarioId, + config: RunConfig, +) -> anyhow::Result<(StandaloneHostOutcome, Vec)> { + let mut runtime = madsim::runtime::Runtime::with_seed_and_config(seed.0, madsim::Config::default()); + runtime.set_allow_system_thread(true); + runtime.block_on(run_once_async(seed, scenario, config)) +} + + +async fn run_once_async( + seed: DstSeed, + scenario: HostScenarioId, + config: RunConfig, +) -> anyhow::Result<(StandaloneHostOutcome, Vec)> { + let module = compiled_module()?; + let reducers = extract_reducer_specs(module.clone()).await?; + let mut generator = NextInteractionGenerator::new( + seed, + scenario, + reducers.clone(), + config.max_interactions_or_default(usize::MAX), + ); + let mut engine = StandaloneHostEngine::new(seed, module).await?; + let deadline = config.deadline(); + let mut trace_log = Vec::new(); + + loop { + if deadline.is_some_and(|deadline| Instant::now() >= deadline) { + generator.request_finish(); + } + let Some(interaction) = generator.next_interaction() else { + break; + }; + trace!(?interaction, "standalone_host interaction"); + engine + .execute(&interaction) + .await + .map_err(|e| anyhow::anyhow!("interaction failed: {e}"))?; + trace_log.push(interaction); + } + + // Replay contract: same seed/scenario/config must produce same interaction sequence. + let mut replay = NextInteractionGenerator::new( + seed, + scenario, + reducers, + config.max_interactions_or_default(usize::MAX), + ); + let replayed = (0..trace_log.len()) + .filter_map(|_| replay.next_interaction()) + .collect::>(); + if replayed != trace_log { + anyhow::bail!("interaction sequence replay mismatch"); + } + + Ok((engine.finish(), trace_log)) +} + +#[derive(Clone)] +struct CompiledModuleInfo { + program_bytes: Bytes, + host_type: HostType, +} + +fn compiled_module() -> anyhow::Result> { + static CACHE: OnceLock> = OnceLock::new(); + if let Some(cached) = CACHE.get() { + return Ok(cached.clone()); + } + let module_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../modules/module-test"); + let (path, host_type) = spacetimedb_cli::build(&module_root, Some(PathBuf::from("src")).as_deref(), true, None)?; + let host_type: HostType = host_type.parse()?; + let program_bytes = std::fs::read(path)?; + let compiled = Arc::new(CompiledModuleInfo { + program_bytes: program_bytes.into(), + host_type, + }); + let _ = CACHE.set(compiled.clone()); + Ok(CACHE.get().expect("cache set or raced").clone()) +} + +async fn extract_reducer_specs(module: Arc) -> anyhow::Result> { + let module_def = + spacetimedb_core::host::extract_schema(module.program_bytes.clone().to_vec().into_boxed_slice(), module.host_type) + .await?; + Ok(module_def + .reducers() + .filter(|reducer| reducer.visibility == FunctionVisibility::ClientCallable) + .map(|reducer| ModuleReducerSpec { + name: reducer.name.to_string(), + params: reducer + .params + .elements + .iter() + .map(|arg| arg.algebraic_type.clone()) + .collect::>(), + }) + .collect::>()) +} + +struct HostSession { + _env: Arc, + client: ClientConnection, + db_identity: Identity, +} + +struct StandaloneHostEngine { + root_dir: RootDir, + session: Option, + module: Arc, + step: usize, + reducer_calls: usize, + scheduler_waits: usize, + reopens: usize, + noops: usize, + expected_errors: usize, +} + +impl StandaloneHostEngine { + async fn new(seed: DstSeed, module: Arc) -> anyhow::Result { + let root_dir = RootDir(std::env::temp_dir().join(format!( + "spacetimedb-dst-standalone-host-{}-{}-{}", + seed.0, + std::process::id(), + SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos() + ))); + let _ = std::fs::remove_dir_all(&root_dir); + let session = open_session(&root_dir, &module, None).await.map_err(anyhow::Error::msg)?; + Ok(Self { + root_dir, + session: Some(session), + module, + step: 0, + reducer_calls: 0, + scheduler_waits: 0, + reopens: 0, + noops: 0, + expected_errors: 0, + }) + } + + async fn execute(&mut self, interaction: &ModuleInteraction) -> Result<(), String> { + self.step = self.step.saturating_add(1); + match interaction { + ModuleInteraction::CallReducer { reducer, args } => { + self.reducer_calls = self.reducer_calls.saturating_add(1); + let request_id = (self.step as u32).saturating_sub(1); + let product = ProductValue::from_iter(args.iter().cloned()); + let payload = spacetimedb_sats::bsatn::to_vec(&product).map_err(|e| e.to_string())?; + let res = self + .session + .as_mut() + .ok_or_else(|| "host session missing".to_string())? + .client + .call_reducer( + reducer, + FunctionArgs::Bsatn(payload.into()), + request_id, + Instant::now(), + ws_v1::CallReducerFlags::FullUpdate, + ) + .await; + match res { + Ok(_) => Ok(()), + Err(err) => { + let msg = err.to_string(); + if is_expected_error(reducer, &msg) { + self.expected_errors = self.expected_errors.saturating_add(1); + Ok(()) + } else { + Err(format!("unexpected reducer error reducer={reducer}: {msg}")) + } + } + } + } + ModuleInteraction::WaitScheduled { millis } => { + self.scheduler_waits = self.scheduler_waits.saturating_add(1); + tokio::time::sleep(std::time::Duration::from_millis(*millis)).await; + Ok(()) + } + ModuleInteraction::CloseReopen => { + self.reopens = self.reopens.saturating_add(1); + let db_identity = self + .session + .as_ref() + .ok_or_else(|| "host session missing".to_string())? + .db_identity; + let old = self.session.take(); + drop(old); + self.session = Some(open_session(&self.root_dir, &self.module, Some(db_identity)).await?); + Ok(()) + } + ModuleInteraction::NoOp => { + self.noops = self.noops.saturating_add(1); + Ok(()) + } + } + } + + fn finish(self) -> StandaloneHostOutcome { + StandaloneHostOutcome { + steps_executed: self.step, + reducer_calls: self.reducer_calls, + scheduler_waits: self.scheduler_waits, + reopens: self.reopens, + noops: self.noops, + expected_errors: self.expected_errors, + } + } +} + +fn is_expected_error(_reducer: &str, msg: &str) -> bool { + msg.contains("permission denied") +} + +async fn open_session( + root_dir: &RootDir, + module: &CompiledModuleInfo, + maybe_db_identity: Option, +) -> Result { + let paths = SpacetimePaths::from_root_dir(root_dir); + let certs = CertificateAuthority::in_cli_config_dir(&paths.cli_config_dir); + let env = StandaloneEnv::init( + StandaloneOptions { + db_config: DbConfig { + storage: Storage::Disk, + page_pool_max_size: None, + }, + websocket: WebSocketOptions::default(), + v8_heap_policy: Default::default(), + }, + &certs, + paths.data_dir.into(), + JobCores::without_pinned_cores(), + ) + .await + .map_err(|e| format!("standalone init failed: {e:#}"))?; + + let caller_identity = Identity::ZERO; + let db_identity = match maybe_db_identity { + Some(identity) => identity, + None => SpacetimeAuth::alloc(&env) + .await + .map_err(|e| format!("db identity allocation failed: {e:#?}"))? + .claims + .identity, + }; + + if env + .get_database_by_identity(&db_identity) + .await + .map_err(|e| format!("database lookup failed: {e:#}"))? + .is_none() + { + env.publish_database( + &caller_identity, + spacetimedb_client_api::DatabaseDef { + database_identity: db_identity, + program_bytes: module.program_bytes.clone(), + num_replicas: None, + host_type: module.host_type, + parent: None, + organization: None, + }, + MigrationPolicy::Compatible, + ) + .await + .map_err(|e| format!("publish module failed: {e:#}"))?; + } + + let database = env + .get_database_by_identity(&db_identity) + .await + .map_err(|e| format!("database lookup after publish failed: {e:#}"))? + .ok_or_else(|| "database not found after publish".to_string())?; + let replica = env + .get_leader_replica_by_database(database.id) + .await + .ok_or_else(|| "leader replica not found".to_string())?; + let host = env + .leader(database.id) + .await + .map_err(|e| format!("leader host unavailable: {e:#}"))?; + let module_rx = host + .module_watcher() + .await + .map_err(|e| format!("module watcher failed: {e:#}"))?; + let client_id = ClientActorId { + identity: caller_identity, + connection_id: generate_random_connection_id(), + name: env.client_actor_index().next_client_name(), + }; + let client = ClientConnection::dummy(client_id, ClientConfig::for_test(), replica.id, module_rx); + Ok(HostSession { + _env: env, + client, + db_identity, + }) +} diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index cb50bedef38..6f3378f67fb 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -1,11 +1,9 @@ //! Serializable interaction model for relational-db + commitlog DST. -use serde::{Deserialize, Serialize}; - use crate::workload::table_ops::TableWorkloadInteraction; /// One interaction in the commitlog-oriented mixed workload. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum CommitlogInteraction { /// Reused base workload interaction from `table_ops`. Table(TableWorkloadInteraction), @@ -22,7 +20,7 @@ pub enum CommitlogInteraction { } /// Successful run summary for commitlog target. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct CommitlogWorkloadOutcome { pub applied_steps: usize, pub durable_commit_count: usize, diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs index 52482e737f1..ab6eb8c0b17 100644 --- a/crates/dst/src/workload/mod.rs +++ b/crates/dst/src/workload/mod.rs @@ -1,5 +1,6 @@ //! Shared workload generators reused by multiple DST targets. pub mod commitlog_ops; +pub mod module_ops; pub(crate) mod strategy; pub mod table_ops; diff --git a/crates/dst/src/workload/module_ops/generation.rs b/crates/dst/src/workload/module_ops/generation.rs new file mode 100644 index 00000000000..22898c8be84 --- /dev/null +++ b/crates/dst/src/workload/module_ops/generation.rs @@ -0,0 +1,119 @@ +use crate::{ + core::NextInteractionSource, + schema::generate_value_for_type, + seed::{DstRng, DstSeed}, + workload::strategy::{Index, Strategy, Weighted}, +}; + +use super::{HostScenarioId, ModuleInteraction, ModuleReducerSpec}; + +const MAX_REGEN_ATTEMPTS: usize = 16; + +#[derive(Clone, Copy, Debug)] +enum ActionKind { + Reducer, + Wait, + Reopen, +} + +/// Deterministic stream generator for standalone-host interactions. +pub(crate) struct NextInteractionGenerator { + scenario: HostScenarioId, + reducers: Vec, + rng: DstRng, + target_interactions: usize, + emitted: usize, +} + +impl NextInteractionGenerator { + pub fn new(seed: DstSeed, scenario: HostScenarioId, reducers: Vec, target_interactions: usize) -> Self { + Self { + scenario, + reducers, + rng: seed.fork(300).rng(), + target_interactions, + emitted: 0, + } + } + + pub fn request_finish(&mut self) { + self.target_interactions = self.emitted; + } + + fn choose_action(&mut self) -> ActionKind { + match self.scenario { + HostScenarioId::HostSmoke => { + Weighted::new(vec![(85, ActionKind::Reducer), (10, ActionKind::Wait), (5, ActionKind::Reopen)]) + .sample(&mut self.rng) + } + } + } + + fn generate_reducer_interaction(&mut self) -> Option { + if self.reducers.is_empty() { + return None; + } + let idx = Index::new(self.reducers.len()).sample(&mut self.rng); + let spec = &self.reducers[idx]; + let mut args = Vec::with_capacity(spec.params.len()); + for (arg_index, ty) in spec.params.iter().enumerate() { + if !supports_generation(ty) { + return None; + } + args.push(generate_value_for_type(&mut self.rng, ty, arg_index)); + } + Some(ModuleInteraction::CallReducer { + reducer: spec.name.clone(), + args, + }) + } + + fn generate_next(&mut self) -> ModuleInteraction { + for _ in 0..MAX_REGEN_ATTEMPTS { + let next = match self.choose_action() { + ActionKind::Reducer => self.generate_reducer_interaction(), + ActionKind::Wait => Some(ModuleInteraction::WaitScheduled { millis: 1_200 }), + ActionKind::Reopen => Some(ModuleInteraction::CloseReopen), + }; + if let Some(next) = next { + return next; + } + } + ModuleInteraction::NoOp + } +} + +fn supports_generation(ty: &spacetimedb_sats::AlgebraicType) -> bool { + use spacetimedb_sats::AlgebraicType; + matches!( + ty, + AlgebraicType::Bool + | AlgebraicType::I8 + | AlgebraicType::U8 + | AlgebraicType::I16 + | AlgebraicType::U16 + | AlgebraicType::I32 + | AlgebraicType::U32 + | AlgebraicType::I64 + | AlgebraicType::U64 + | AlgebraicType::I128 + | AlgebraicType::U128 + | AlgebraicType::String + ) +} + +impl NextInteractionSource for NextInteractionGenerator { + type Interaction = ModuleInteraction; + + fn next_interaction(&mut self) -> Option { + if self.emitted >= self.target_interactions { + return None; + } + self.emitted += 1; + Some(self.generate_next()) + } + + fn request_finish(&mut self) { + Self::request_finish(self); + } +} diff --git a/crates/dst/src/workload/module_ops/mod.rs b/crates/dst/src/workload/module_ops/mod.rs new file mode 100644 index 00000000000..91d943d562e --- /dev/null +++ b/crates/dst/src/workload/module_ops/mod.rs @@ -0,0 +1,7 @@ +//! Workload for standalone host/module testing. + +mod generation; +mod types; + +pub(crate) use generation::NextInteractionGenerator; +pub use types::{HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome}; diff --git a/crates/dst/src/workload/module_ops/types.rs b/crates/dst/src/workload/module_ops/types.rs new file mode 100644 index 00000000000..9d57f185c1e --- /dev/null +++ b/crates/dst/src/workload/module_ops/types.rs @@ -0,0 +1,40 @@ +use spacetimedb_sats::AlgebraicType; + +/// Single v1 scenario for standalone host target. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum HostScenarioId { + #[default] + HostSmoke, +} + +/// Reducer metadata used by the typed argument generator. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ModuleReducerSpec { + pub name: String, + pub params: Vec, +} + +/// One standalone-host interaction. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ModuleInteraction { + CallReducer { + reducer: String, + args: Vec, + }, + WaitScheduled { + millis: u64, + }, + CloseReopen, + NoOp, +} + +/// Run summary for standalone-host target. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ModuleWorkloadOutcome { + pub steps_executed: usize, + pub reducer_calls: usize, + pub scheduler_waits: usize, + pub reopens: usize, + pub noops: usize, + pub expected_errors: usize, +} diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs index 9ac7cab4f12..e6a95cc1c4c 100644 --- a/crates/dst/src/workload/table_ops/scenarios/mod.rs +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -1,8 +1,6 @@ mod banking; mod random_crud; -use serde::{Deserialize, Serialize}; - use crate::{schema::SchemaPlan, seed::DstRng}; use super::{generation::ScenarioPlanner, TableScenario, TableWorkloadOutcome}; @@ -16,7 +14,7 @@ pub(crate) struct IndexedRangesScenario; #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub(crate) struct BankingScenario; -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub enum TableScenarioId { #[default] RandomCrud, diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index 3bc568b2bf2..d27d17bca5c 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -1,5 +1,3 @@ -use serde::{Deserialize, Serialize}; - use crate::{ core::TargetEngine, schema::{SchemaPlan, SimRow}, @@ -19,7 +17,7 @@ pub(crate) trait TableScenario: Clone { } /// One generated workload step. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum TableWorkloadInteraction { BeginTx { conn: usize }, CommitTx { conn: usize }, @@ -29,7 +27,7 @@ pub enum TableWorkloadInteraction { } /// Final state gathered from a table-workload engine after execution ends. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct TableWorkloadOutcome { /// Row count for each table in schema order. pub final_row_counts: Vec, diff --git a/crates/standalone/Cargo.toml b/crates/standalone/Cargo.toml index 0ce65a57ed0..704c033cf26 100644 --- a/crates/standalone/Cargo.toml +++ b/crates/standalone/Cargo.toml @@ -55,7 +55,7 @@ serde_json.workspace = true sled.workspace = true socket2.workspace = true thiserror.workspace = true -tokio.workspace = true +tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } tower-http.workspace = true toml.workspace = true tracing = { workspace = true, features = ["release_max_level_debug"] } diff --git a/crates/standalone/src/subcommands/start.rs b/crates/standalone/src/subcommands/start.rs index 42a04d1e2e6..80150c497c0 100644 --- a/crates/standalone/src/subcommands/start.rs +++ b/crates/standalone/src/subcommands/start.rs @@ -1,5 +1,6 @@ use netstat2::{get_sockets_info, AddressFamilyFlags, ProtocolFlags, ProtocolSocketInfo, TcpState}; use spacetimedb_client_api::routes::identity::IdentityRoutes; +#[cfg(not(madsim))] use spacetimedb_pg::pg_server; use std::io::{self, Write}; use std::net::IpAddr; @@ -20,6 +21,7 @@ use spacetimedb_client_api::routes::router; use spacetimedb_client_api::routes::subscribe::WebSocketOptions; use spacetimedb_paths::cli::{PrivKeyPath, PubKeyPath}; use spacetimedb_paths::server::{ConfigToml, ServerDataDir}; +#[cfg(not(madsim))] use tokio::net::TcpListener; pub fn cli() -> clap::Command { @@ -197,13 +199,26 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { ); worker_metrics::spawn_page_pool_stats(listen_addr.clone(), ctx.page_pool().clone()); worker_metrics::spawn_bsatn_rlb_pool_stats(listen_addr.clone(), ctx.bsatn_rlb_pool().clone()); + #[cfg(madsim)] + { + let _ = (pg_port, ctx, listen_addr); + anyhow::bail!("standalone start server mode is not supported under madsim"); + } + + #[cfg(not(madsim))] let mut db_routes = DatabaseRoutes::default(); - db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); - db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); - db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + #[cfg(not(madsim))] + { + db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); + db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); + db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + } + #[cfg(not(madsim))] let extra = axum::Router::new().nest("/health", spacetimedb_client_api::routes::health::router()); + #[cfg(not(madsim))] let service = router(&ctx, db_routes, IdentityRoutes::default(), extra).with_state(ctx.clone()); + #[cfg(not(madsim))] // Check if the requested port is available on both IPv4 and IPv6. // If not, offer to find an available port by incrementing (unless non-interactive). let listen_addr = if let Some((host, port_str)) = listen_addr.rsplit_once(':') { @@ -249,38 +264,41 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { listen_addr.to_string() }; - let tcp = TcpListener::bind(&listen_addr).await.context(format!( - "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" - ))?; - socket2::SockRef::from(&tcp).set_nodelay(true)?; - log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); - - if let Some(pg_port) = pg_port { - let server_addr = listen_addr.split(':').next().unwrap(); - let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( - "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + #[cfg(not(madsim))] + { + let tcp = TcpListener::bind(&listen_addr).await.context(format!( + "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" ))?; - - let notify = Arc::new(tokio::sync::Notify::new()); - let shutdown_notify = notify.clone(); - tokio::select! { - _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, - _ = axum::serve(tcp, service).with_graceful_shutdown(async move { - shutdown_notify.notified().await; - }) => {}, - _ = tokio::signal::ctrl_c() => { - println!("Shutting down servers..."); - notify.notify_waiters(); // Notify all tasks + socket2::SockRef::from(&tcp).set_nodelay(true)?; + log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); + + if let Some(pg_port) = pg_port { + let server_addr = listen_addr.split(':').next().unwrap(); + let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( + "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + ))?; + + let notify = Arc::new(tokio::sync::Notify::new()); + let shutdown_notify = notify.clone(); + tokio::select! { + _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, + _ = axum::serve(tcp, service).with_graceful_shutdown(async move { + shutdown_notify.notified().await; + }) => {}, + _ = tokio::signal::ctrl_c() => { + println!("Shutting down servers..."); + notify.notify_waiters(); // Notify all tasks + } } + } else { + log::warn!("PostgreSQL wire protocol server disabled"); + axum::serve(tcp, service) + .with_graceful_shutdown(async { + tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); + log::info!("Shutting down server..."); + }) + .await?; } - } else { - log::warn!("PostgreSQL wire protocol server disabled"); - axum::serve(tcp, service) - .with_graceful_shutdown(async { - tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); - log::info!("Shutting down server..."); - }) - .await?; } Ok(()) diff --git a/run_dst.sh b/run_dst.sh new file mode 100755 index 00000000000..6dd6bee074d --- /dev/null +++ b/run_dst.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -euo pipefail + +cd "$(dirname "$0")" + +export RUSTFLAGS="${RUSTFLAGS:+$RUSTFLAGS }--cfg madsim" +exec cargo run -p spacetimedb-dst -- "$@" From 19badab6920653e8973bbc81ac5cb76c77dd85b2 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 29 Apr 2026 15:07:02 +0530 Subject: [PATCH 18/74] cleanups --- Cargo.lock | 2 + crates/dst/Cargo.toml | 4 + crates/dst/README.md | 6 +- crates/dst/src/core/mod.rs | 22 +- crates/dst/src/lib.rs | 2 +- crates/dst/src/main.rs | 49 ++- crates/dst/src/schema.rs | 1 - crates/dst/src/targets/datastore.rs | 407 ------------------ crates/dst/src/targets/descriptor.rs | 84 ++-- crates/dst/src/targets/harness.rs | 20 - crates/dst/src/targets/mod.rs | 2 - crates/dst/src/targets/properties.rs | 129 +++++- .../src/targets/relational_db_commitlog.rs | 132 +++--- crates/dst/src/targets/standalone_host.rs | 72 ++-- .../dst/src/workload/commitlog_ops/types.rs | 3 +- .../dst/src/workload/module_ops/generation.rs | 17 +- crates/dst/src/workload/table_ops/mod.rs | 7 +- crates/dst/src/workload/table_ops/model.rs | 4 +- crates/dst/src/workload/table_ops/runner.rs | 75 ---- crates/dst/src/workload/table_ops/types.rs | 28 -- crates/dst/tests/madsim_axum_reqwest.rs | 36 ++ 21 files changed, 350 insertions(+), 752 deletions(-) delete mode 100644 crates/dst/src/targets/datastore.rs delete mode 100644 crates/dst/src/targets/harness.rs delete mode 100644 crates/dst/src/workload/table_ops/runner.rs create mode 100644 crates/dst/tests/madsim_axum_reqwest.rs diff --git a/Cargo.lock b/Cargo.lock index 33b31ec528c..4e88c4d1e8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8290,10 +8290,12 @@ name = "spacetimedb-dst" version = "2.1.0" dependencies = [ "anyhow", + "axum", "bytes", "clap 4.5.50", "madsim", "madsim-tokio", + "reqwest 0.12.24", "spacetimedb-cli", "spacetimedb-client-api", "spacetimedb-client-api-messages", diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 093c2fa6d64..e9cf4aab9c4 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -37,3 +37,7 @@ spacetimedb-table.workspace = true tracing.workspace = true tracing-subscriber.workspace = true madsim = { path = "../../../../madsim/madsim" } + +[dev-dependencies] +axum.workspace = true +reqwest.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md index 8ed57dd7c80..28974746cdf 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -17,8 +17,8 @@ generated and executed as a deterministic stream. ## Current Targets -- `datastore` - `relational-db-commitlog` +- `standalone-host` Both targets reuse shared workload families and share the same streaming runner. @@ -48,8 +48,8 @@ Examples: ## CLI ```bash -cargo run -p spacetimedb-dst -- run --target datastore --scenario banking --duration 5m -cargo run -p spacetimedb-dst -- run --target datastore --scenario indexed-ranges --duration 5m +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario banking --duration 5m +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario indexed-ranges --duration 5m cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --seed 42 --max-interactions 2000 ``` diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index 177de8b6d4f..e03b340add8 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -30,26 +30,12 @@ pub trait TargetEngine { fn collect_outcome(&mut self) -> anyhow::Result; } -/// Target-owned property lifecycle hooks. -pub trait PropertySet { - type Error; - - fn on_interaction(&mut self, interaction: &I, step: usize) -> Result<(), Self::Error>; - fn on_finish(&mut self, outcome: &O) -> Result<(), Self::Error>; -} - /// Shared streaming runner. -pub fn run_streaming( - mut source: S, - mut engine: E, - mut properties: P, - cfg: RunConfig, -) -> anyhow::Result +pub fn run_streaming(mut source: S, mut engine: E, cfg: RunConfig) -> anyhow::Result where I: Clone, S: NextInteractionSource, E: TargetEngine, - P: PropertySet, { let deadline = cfg.deadline(); let mut step = 0usize; @@ -63,15 +49,9 @@ where engine .execute_interaction(&interaction) .map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; - properties - .on_interaction(&interaction, step) - .map_err(|e| anyhow::anyhow!("property failed at step {step}: {e}"))?; step = step.saturating_add(1); } engine.finish(); let outcome = engine.collect_outcome()?; - properties - .on_finish(&outcome) - .map_err(|e| anyhow::anyhow!("finish property failed: {e}"))?; Ok(outcome) } diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index 6b481361b11..dcfe5e91c06 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -5,7 +5,7 @@ //! - [`config`] for run budgets, //! - [`seed`] for deterministic seeds, //! - [`workload`] for scenario identifiers, -//! - [`targets`] for the executable datastore / relational-db adapters. +//! - [`targets`] for executable relational-db / standalone-host adapters. /// Shared run-budget configuration for DST targets. pub mod config; diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index 4c8c446796a..e7a7961b11b 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -1,10 +1,13 @@ -use std::time::{SystemTime, UNIX_EPOCH}; +use std::{ + future::Future, + time::{SystemTime, UNIX_EPOCH}, +}; use clap::{Args, Parser, Subcommand, ValueEnum}; use spacetimedb_dst::{ config::RunConfig, seed::DstSeed, - targets::descriptor::{DatastoreDescriptor, RelationalDbCommitlogDescriptor, StandaloneHostDescriptor, TargetDescriptor}, + targets::descriptor::{RelationalDbCommitlogDescriptor, StandaloneHostDescriptor, TargetDescriptor}, workload::{module_ops::HostScenarioId, table_ops::TableScenarioId}, }; @@ -23,7 +26,7 @@ enum Command { #[derive(Args, Debug, Clone)] struct TargetArgs { - #[arg(long, value_enum, default_value_t = TargetKind::Datastore)] + #[arg(long, value_enum, default_value_t = TargetKind::RelationalDbCommitlog)] target: TargetKind, #[arg(long, value_enum, default_value_t = ScenarioKind::RandomCrud)] scenario: ScenarioKind, @@ -43,7 +46,6 @@ struct RunArgs { #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] enum TargetKind { - Datastore, RelationalDbCommitlog, StandaloneHost, } @@ -81,21 +83,44 @@ fn run_command(args: RunArgs) -> anyhow::Result<()> { let config = build_config(args.duration.as_deref(), args.max_interactions)?; match args.target.target { - TargetKind::Datastore => { - let scenario = map_table_scenario(args.target.scenario)?; - run_target::(seed, scenario, config) - } TargetKind::RelationalDbCommitlog => { let scenario = map_table_scenario(args.target.scenario)?; - run_target::(seed, scenario, config) + run_prepared_target::(seed, scenario, config) } TargetKind::StandaloneHost => { let scenario = map_host_scenario(args.target.scenario)?; - run_target::(seed, scenario, config) + run_prepared_target::(seed, scenario, config) } } } +fn run_prepared_target( + seed: DstSeed, + scenario: D::Scenario, + config: RunConfig, +) -> anyhow::Result<()> { + D::prepare(seed, &scenario, &config)?; + run_in_runtime(seed, run_target::(seed, scenario, config)) +} + +#[cfg(madsim)] +fn run_in_runtime(seed: DstSeed, future: F) -> anyhow::Result +where + F: Future>, +{ + let mut runtime = madsim::runtime::Runtime::with_seed_and_config(seed.0, madsim::Config::default()); + runtime.set_allow_system_thread(true); + runtime.block_on(future) +} + +#[cfg(not(madsim))] +fn run_in_runtime(_seed: DstSeed, future: F) -> anyhow::Result +where + F: Future>, +{ + tokio::runtime::Runtime::new()?.block_on(future) +} + fn map_table_scenario(scenario: ScenarioKind) -> anyhow::Result { match scenario { ScenarioKind::RandomCrud => Ok(TableScenarioId::RandomCrud), @@ -134,12 +159,12 @@ fn build_config(duration: Option<&str>, max_interactions: Option) -> anyh } } -fn run_target( +async fn run_target( seed: DstSeed, scenario: D::Scenario, config: RunConfig, ) -> anyhow::Result<()> { - let line = D::run_streaming(seed, scenario, config)?; + let line = D::run_streaming(seed, scenario, config).await?; println!("{line}"); Ok(()) } diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs index 43a69b557f8..80349565828 100644 --- a/crates/dst/src/schema.rs +++ b/crates/dst/src/schema.rs @@ -82,7 +82,6 @@ pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) } } - impl SimRow { pub fn to_product_value(&self) -> ProductValue { ProductValue::from_iter(self.values.iter().cloned()) diff --git a/crates/dst/src/targets/datastore.rs b/crates/dst/src/targets/datastore.rs deleted file mode 100644 index 930d876641d..00000000000 --- a/crates/dst/src/targets/datastore.rs +++ /dev/null @@ -1,407 +0,0 @@ -//! Randomized datastore simulator target built on the shared table workload. - -use std::ops::Bound; - -use spacetimedb_datastore::{ - execution_context::Workload, - locking_tx_datastore::{datastore::Locking, MutTxId}, - traits::{IsolationLevel, MutTx, MutTxDatastore, Tx, TxDatastore}, -}; -use spacetimedb_execution::Datastore as _; -use spacetimedb_lib::{ - db::auth::{StAccess, StTableType}, - Identity, -}; -use spacetimedb_primitives::TableId; -use spacetimedb_sats::AlgebraicValue; -use spacetimedb_schema::{ - def::BTreeAlgorithm, - schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, - table_name::TableName, -}; -use spacetimedb_table::page_pool::PagePool; - -use crate::{ - config::RunConfig, - schema::{SchemaPlan, SimRow}, - seed::DstSeed, - targets::{ - harness::{self, TableTargetHarness}, - properties::{PropertyRuntime, TargetPropertyAccess}, - }, - workload::table_ops::{ - ConnectionWriteState, TableScenarioId, TableWorkloadEngine, TableWorkloadInteraction, TableWorkloadOutcome, - }, -}; - -pub type DatastoreSimulatorOutcome = TableWorkloadOutcome; -type Interaction = TableWorkloadInteraction; - -struct DatastoreTarget; - -impl TableTargetHarness for DatastoreTarget { - type Engine = DatastoreEngine; - - fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - DatastoreEngine::new(schema, num_connections) - } -} - -pub fn run_generated_with_config_and_scenario( - seed: DstSeed, - scenario: TableScenarioId, - config: RunConfig, -) -> anyhow::Result { - harness::run_generated_with_config_and_scenario::(seed, scenario, config) -} - -/// Concrete datastore execution harness for the shared table workload. -struct DatastoreEngine { - schema: SchemaPlan, - datastore: Locking, - table_ids: Vec, - execution: ConnectionWriteState, - properties: PropertyRuntime, - step: u64, -} - -impl DatastoreEngine { - fn new(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - let datastore = bootstrap_datastore()?; - let table_ids = install_schema(&datastore, schema)?; - Ok(Self { - schema: schema.clone(), - datastore, - table_ids, - execution: ConnectionWriteState::new(num_connections), - properties: PropertyRuntime::default(), - step: 0, - }) - } - - fn with_mut_tx( - &mut self, - conn: usize, - table: usize, - mut f: impl FnMut(&Locking, TableId, &mut MutTxId) -> Result, - ) -> Result { - let table_id = *self - .table_ids - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - self.execution.ensure_known_connection(conn)?; - let slot = &mut self.execution.tx_by_connection[conn]; - - match slot { - Some(tx) => f(&self.datastore, table_id, tx), - None => { - if let Some(owner) = self.execution.active_writer { - return Err(format!( - "connection {conn} cannot auto-commit write while connection {owner} owns lock" - )); - } - let mut tx = self - .datastore - .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - self.execution.active_writer = Some(conn); - let value = f(&self.datastore, table_id, &mut tx)?; - self.datastore - .commit_mut_tx(tx) - .map_err(|err| format!("auto-commit failed on connection {conn}: {err}"))?; - self.execution.active_writer = None; - Ok(value) - } - } - } - - fn fresh_lookup(&self, table_id: TableId, id: u64) -> anyhow::Result> { - let tx = self.datastore.begin_tx(Workload::ForTests); - Ok(tx - .table_scan(table_id)? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .find(|row| row.id() == Some(id))) - } - - fn collect_rows_for_table(&self, table: usize) -> anyhow::Result> { - let table_id = *self - .table_ids - .get(table) - .ok_or_else(|| anyhow::anyhow!("table {table} out of range"))?; - let tx = self.datastore.begin_tx(Workload::ForTests); - let mut rows = tx - .table_scan(table_id)? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - Ok(rows) - } - - fn fresh_range_scan( - &self, - table_id: TableId, - cols: &[u16], - lower: Bound, - upper: Bound, - ) -> anyhow::Result> { - let tx = self.datastore.begin_tx(Workload::ForTests); - let cols = cols.iter().copied().collect::(); - let rows = self - .datastore - .iter_by_col_range_tx(&tx, table_id, cols, (lower, upper))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect(); - Ok(rows) - } - - fn table_id(&self, table: usize) -> Result { - self.table_ids - .get(table) - .copied() - .ok_or_else(|| format!("table {table} out of range")) - } - - fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { - let table_id = self.table_id(table)?; - if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { - Ok(self - .datastore - .iter_by_col_eq_mut_tx(tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("in-tx lookup failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .next()) - } else { - self.fresh_lookup(table_id, id) - .map_err(|err| format!("fresh lookup failed: {err}")) - } - } - - fn count_rows_for_property(&self, table: usize) -> Result { - let table_id = self.table_id(table)?; - let tx = self.datastore.begin_tx(Workload::ForTests); - Ok(tx.row_count(table_id) as usize) - } - - fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { - let table_id = self.table_id(table)?; - let tx = self.datastore.begin_tx(Workload::ForTests); - self.datastore - .iter_by_col_eq_tx(&tx, table_id, col, value) - .map(|rows| rows.count()) - .map_err(|err| format!("predicate query failed: {err}")) - } - - fn range_scan_for_property( - &self, - table: usize, - cols: &[u16], - lower: Bound, - upper: Bound, - ) -> Result, String> { - let table_id = self.table_id(table)?; - self.fresh_range_scan(table_id, cols, lower, upper) - .map_err(|err| format!("range scan failed: {err}")) - } - - fn with_property_runtime( - &mut self, - f: impl FnOnce(&mut PropertyRuntime, &Self) -> Result, - ) -> Result { - let mut runtime = std::mem::take(&mut self.properties); - let result = f(&mut runtime, self); - self.properties = runtime; - result - } -} - -impl TargetPropertyAccess for DatastoreEngine { - fn schema_plan(&self) -> &SchemaPlan { - &self.schema - } - - fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { - Self::lookup_in_connection(self, conn, table, id) - } - - fn collect_rows_for_table(&self, table: usize) -> Result, String> { - Self::collect_rows_for_table(self, table).map_err(|err| format!("collect rows failed: {err}")) - } - - fn count_rows(&self, table: usize) -> Result { - Self::count_rows_for_property(self, table) - } - - fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { - Self::count_by_col_eq_for_property(self, table, col, value) - } - - fn range_scan( - &self, - table: usize, - cols: &[u16], - lower: Bound, - upper: Bound, - ) -> Result, String> { - Self::range_scan_for_property(self, table, cols, lower, upper) - } -} - -impl TableWorkloadEngine for DatastoreEngine { - fn execute(&mut self, interaction: &Interaction) -> Result<(), String> { - self.step = self.step.saturating_add(1); - match interaction { - Interaction::BeginTx { conn } => { - self.execution.ensure_known_connection(*conn)?; - if self.execution.tx_by_connection[*conn].is_some() { - return Err(format!("connection {conn} already has open transaction")); - } - if let Some(owner) = self.execution.active_writer { - return Err(format!( - "connection {conn} cannot begin write transaction while connection {owner} owns lock" - )); - } - self.execution.tx_by_connection[*conn] = Some( - self.datastore - .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), - ); - self.execution.active_writer = Some(*conn); - } - Interaction::CommitTx { conn } => { - self.execution.ensure_writer_owner(*conn, "commit")?; - let tx = self.execution.tx_by_connection[*conn] - .take() - .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; - self.datastore - .commit_mut_tx(tx) - .map_err(|err| format!("commit failed on connection {conn}: {err}"))?; - self.execution.active_writer = None; - self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; - } - Interaction::RollbackTx { conn } => { - self.execution.ensure_writer_owner(*conn, "rollback")?; - let tx = self.execution.tx_by_connection[*conn] - .take() - .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; - let _ = self.datastore.rollback_mut_tx(tx); - self.execution.active_writer = None; - self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; - } - Interaction::Insert { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); - let inserted_row = self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { - let bsatn = row.to_bsatn().map_err(|err: anyhow::Error| err.to_string())?; - let (_, row_ref, _) = datastore - .insert_mut_tx(tx, table_id, &bsatn) - .map_err(|err| format!("insert failed: {err}"))?; - Ok(SimRow::from_product_value(row_ref.to_product_value())) - })?; - let step = self.step; - self.with_property_runtime(|runtime, access| { - runtime.on_insert(access, step, *conn, *table, &inserted_row, in_tx) - })?; - } - Interaction::Delete { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); - self.with_mut_tx(*conn, *table, |datastore, table_id, tx| { - let deleted = datastore.delete_by_rel_mut_tx(tx, table_id, [row.to_product_value()]); - if deleted != 1 { - return Err(format!("delete expected 1 row, got {deleted}")); - } - Ok(()) - })?; - let step = self.step; - self.with_property_runtime(|runtime, access| { - runtime.on_delete(access, step, *conn, *table, row, in_tx) - })?; - } - } - - Ok(()) - } - - fn collect_outcome(&mut self) -> anyhow::Result { - let tx = self.datastore.begin_tx(Workload::ForTests); - let mut final_rows = Vec::with_capacity(self.table_ids.len()); - let mut final_row_counts = Vec::with_capacity(self.table_ids.len()); - - for &table_id in &self.table_ids { - let mut rows = tx - .table_scan(table_id)? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - final_row_counts.push(rows.len() as u64); - final_rows.push(rows); - } - - Ok(DatastoreSimulatorOutcome { - final_row_counts, - final_rows, - }) - } - - fn finish(&mut self) { - for tx in &mut self.execution.tx_by_connection { - if let Some(tx) = tx.take() { - let _ = self.datastore.rollback_mut_tx(tx); - } - } - self.execution.active_writer = None; - } -} - -fn bootstrap_datastore() -> spacetimedb_datastore::Result { - Locking::bootstrap(Identity::ZERO, PagePool::new_for_test()) -} - -fn install_schema(datastore: &Locking, schema: &SchemaPlan) -> anyhow::Result> { - let mut tx = datastore.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - let mut table_ids = Vec::with_capacity(schema.tables.len()); - - for table in &schema.tables { - let columns = table - .columns - .iter() - .enumerate() - .map(|(idx, col)| ColumnSchema::for_test(idx as u16, &col.name, col.ty.clone())) - .collect::>(); - - let mut indexes = vec![IndexSchema::for_test( - format!("{}_id_idx", table.name), - BTreeAlgorithm::from(0), - )]; - for cols in &table.extra_indexes { - let cols_name = cols.iter().map(|col| format!("c{col}")).collect::>().join("_"); - indexes.push(IndexSchema::for_test( - format!("{}_{}_idx", table.name, cols_name), - BTreeAlgorithm::from(cols.iter().copied().collect::()), - )); - } - let constraints = vec![ConstraintSchema::unique_for_test( - format!("{}_id_unique", table.name), - 0, - )]; - - let table_id = datastore.create_table_mut_tx( - &mut tx, - TableSchema::new( - TableId::SENTINEL, - TableName::for_test(&table.name), - None, - columns, - indexes, - constraints, - vec![], - StTableType::User, - StAccess::Public, - None, - Some(0.into()), - false, - None, - ), - )?; - table_ids.push(table_id); - } - - datastore.commit_mut_tx(tx)?; - Ok(table_ids) -} diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index 5286ecb1afa..e32848204c2 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -1,5 +1,7 @@ //! Target descriptor layer used by the CLI. +use std::{future::Future, pin::Pin}; + use crate::{ config::RunConfig, seed::DstSeed, @@ -11,44 +13,35 @@ pub trait TargetDescriptor { const NAME: &'static str; type Scenario; - fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result; -} - -pub struct DatastoreDescriptor; - -impl TargetDescriptor for DatastoreDescriptor { - const NAME: &'static str = "datastore"; - type Scenario = TableScenarioId; - - fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result { - let outcome = crate::targets::datastore::run_generated_with_config_and_scenario(seed, scenario, config)?; - Ok(format!( - "ok target={} seed={} tables={} row_counts={:?}", - Self::NAME, - seed.0, - outcome.final_rows.len(), - outcome.final_row_counts - )) + fn prepare(_seed: DstSeed, _scenario: &Self::Scenario, _config: &RunConfig) -> anyhow::Result<()> { + Ok(()) } + + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture; } +pub type TargetRunFuture = Pin>>>; + pub struct RelationalDbCommitlogDescriptor; impl TargetDescriptor for RelationalDbCommitlogDescriptor { const NAME: &'static str = "relational_db_commitlog"; type Scenario = TableScenarioId; - fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result { - let outcome = - crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config)?; - Ok(format!( - "ok target={} seed={} steps={} durable_commits={} replay_tables={}", - Self::NAME, - seed.0, - outcome.applied_steps, - outcome.durable_commit_count, - outcome.replay_table_count - )) + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { + Box::pin(async move { + let outcome = crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario( + seed, scenario, config, + )?; + Ok(format!( + "ok target={} seed={} steps={} durable_commits={} replay_tables={}", + Self::NAME, + seed.0, + outcome.applied_steps, + outcome.durable_commit_count, + outcome.replay_table_count + )) + }) } } @@ -58,18 +51,25 @@ impl TargetDescriptor for StandaloneHostDescriptor { const NAME: &'static str = "standalone_host"; type Scenario = HostScenarioId; - fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> anyhow::Result { - let outcome = crate::targets::standalone_host::run_generated_with_config_and_scenario(seed, scenario, config)?; - Ok(format!( - "ok target={} seed={} steps={} reducer_calls={} waits={} reopens={} noops={} expected_errors={}", - Self::NAME, - seed.0, - outcome.steps_executed, - outcome.reducer_calls, - outcome.scheduler_waits, - outcome.reopens, - outcome.noops, - outcome.expected_errors - )) + fn prepare(_seed: DstSeed, _scenario: &Self::Scenario, _config: &RunConfig) -> anyhow::Result<()> { + crate::targets::standalone_host::prepare_generated_run() + } + + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { + Box::pin(async move { + let outcome = + crate::targets::standalone_host::run_generated_with_config_and_scenario(seed, scenario, config).await?; + Ok(format!( + "ok target={} seed={} steps={} reducer_calls={} waits={} reopens={} noops={} expected_errors={}", + Self::NAME, + seed.0, + outcome.steps_executed, + outcome.reducer_calls, + outcome.scheduler_waits, + outcome.reopens, + outcome.noops, + outcome.expected_errors + )) + }) } } diff --git a/crates/dst/src/targets/harness.rs b/crates/dst/src/targets/harness.rs deleted file mode 100644 index 5cafd9bbfcf..00000000000 --- a/crates/dst/src/targets/harness.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::{ - config::RunConfig, - schema::SchemaPlan, - seed::DstSeed, - workload::table_ops::{run_generated_with_engine, TableScenarioId, TableWorkloadEngine, TableWorkloadOutcome}, -}; - -pub(crate) trait TableTargetHarness { - type Engine: TableWorkloadEngine; - - fn build_engine(schema: &SchemaPlan, num_connections: usize) -> anyhow::Result; -} - -pub(crate) fn run_generated_with_config_and_scenario( - seed: DstSeed, - scenario: TableScenarioId, - config: RunConfig, -) -> anyhow::Result { - run_generated_with_engine(seed, scenario, config, T::build_engine) -} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index 81143533e36..b28169c826e 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -1,8 +1,6 @@ //! Concrete simulation targets. -pub mod datastore; pub mod descriptor; -pub(crate) mod harness; pub(crate) mod properties; pub mod relational_db_commitlog; pub mod standalone_host; diff --git a/crates/dst/src/targets/properties.rs b/crates/dst/src/targets/properties.rs index 25ceb4ba51c..6ffb62b8af2 100644 --- a/crates/dst/src/targets/properties.rs +++ b/crates/dst/src/targets/properties.rs @@ -1,4 +1,4 @@ -//! Target-level property runtime shared by datastore-oriented targets. +//! Target-level property runtime shared by table-oriented targets. //! //! Properties are defined once here and plugged into any target that //! implements [`TargetPropertyAccess`]. @@ -7,7 +7,10 @@ use std::ops::Bound; use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; -use crate::schema::{SchemaPlan, SimRow}; +use crate::{ + schema::{SchemaPlan, SimRow}, + workload::table_ops::{ExpectedModel, TableScenario, TableWorkloadInteraction, TableWorkloadOutcome}, +}; /// Target adapter for property evaluation. pub(crate) trait TargetPropertyAccess { @@ -61,6 +64,32 @@ impl PropertyRuntime { Self { rules } } + pub fn for_table_workload(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self + where + S: TableScenario + 'static, + { + let mut runtime = Self::default(); + runtime + .rules + .push(RuleEntry::non_periodic(Box::new(ExpectedTableStateRule::new( + scenario, + schema, + num_connections, + )))); + runtime + } + + pub fn on_table_interaction( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + for entry in &mut self.rules { + entry.rule.on_table_interaction(access, interaction)?; + } + Ok(()) + } + pub fn on_insert( &mut self, access: &dyn TargetPropertyAccess, @@ -115,25 +144,46 @@ impl PropertyRuntime { } Ok(()) } + + pub fn on_table_workload_finish( + &mut self, + access: &dyn TargetPropertyAccess, + outcome: &TableWorkloadOutcome, + ) -> Result<(), String> { + for entry in &mut self.rules { + entry.rule.on_table_workload_finish(access, outcome)?; + } + Ok(()) + } } struct RuleEntry { - kind: PropertyKind, + periodic_every: Option, rule: Box, } impl RuleEntry { fn new(kind: PropertyKind, rule: Box) -> Self { - Self { kind, rule } + Self { + periodic_every: match kind { + PropertyKind::SelectSelectOptimizer | PropertyKind::WhereTrueFalseNull => Some(16), + PropertyKind::IndexRangeExcluded => Some(64), + _ => None, + }, + rule, + } } - fn periodic_every(&self) -> Option { - match self.kind { - PropertyKind::SelectSelectOptimizer | PropertyKind::WhereTrueFalseNull => Some(16), - PropertyKind::IndexRangeExcluded => Some(64), - _ => None, + fn non_periodic(rule: Box) -> Self { + Self { + periodic_every: None, + rule, } } + + fn periodic_every(&self) -> Option { + self.periodic_every + } } impl Default for PropertyRuntime { @@ -150,6 +200,14 @@ impl Default for PropertyRuntime { } trait PropertyRule { + fn on_table_interaction( + &mut self, + _access: &dyn TargetPropertyAccess, + _interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + Ok(()) + } + fn on_insert( &mut self, _access: &dyn TargetPropertyAccess, @@ -181,6 +239,59 @@ trait PropertyRule { fn on_commit_or_rollback(&mut self, _access: &dyn TargetPropertyAccess) -> Result<(), String> { Ok(()) } + + fn on_table_workload_finish( + &mut self, + _access: &dyn TargetPropertyAccess, + _outcome: &TableWorkloadOutcome, + ) -> Result<(), String> { + Ok(()) + } +} + +struct ExpectedTableStateRule { + scenario: S, + schema: SchemaPlan, + expected: ExpectedModel, +} + +impl ExpectedTableStateRule { + fn new(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self { + let table_count = schema.tables.len(); + Self { + scenario, + schema, + expected: ExpectedModel::new(table_count, num_connections), + } + } +} + +impl PropertyRule for ExpectedTableStateRule { + fn on_table_interaction( + &mut self, + _access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + self.expected.apply(interaction); + Ok(()) + } + + fn on_table_workload_finish( + &mut self, + _access: &dyn TargetPropertyAccess, + outcome: &TableWorkloadOutcome, + ) -> Result<(), String> { + let expected_rows = self.expected.clone().committed_rows(); + if outcome.final_rows != expected_rows { + return Err(format!( + "[ExpectedTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + )); + } + self.scenario + .validate_outcome(&self.schema, outcome) + .map_err(|err| format!("[ExpectedTableState] scenario invariant failed: {err}")) + } } #[derive(Default)] diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 3c2461171b1..552961a66bf 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -4,19 +4,18 @@ use std::{ collections::BTreeMap, ops::Bound, sync::Arc, - thread::sleep, - time::{Duration, Instant, SystemTime, UNIX_EPOCH}, + time::{SystemTime, UNIX_EPOCH}, }; use spacetimedb_core::{ - db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB, Txdata}, + db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB}, messages::control_db::HostType, }; use spacetimedb_datastore::{ execution_context::Workload, traits::{IsolationLevel, Program}, }; -use spacetimedb_durability::{EmptyHistory, History}; +use spacetimedb_durability::EmptyHistory; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, @@ -34,13 +33,15 @@ use tracing::{debug, info, trace}; use crate::{ config::RunConfig, - core::NextInteractionSource, + core::{self, TargetEngine}, schema::{SchemaPlan, SimRow}, seed::DstSeed, targets::properties::{PropertyRuntime, TargetPropertyAccess}, workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, - table_ops::{ConnectionWriteState, TableScenario, TableScenarioId, TableWorkloadInteraction}, + table_ops::{ + ConnectionWriteState, TableScenario, TableScenarioId, TableWorkloadInteraction, TableWorkloadOutcome, + }, }, }; @@ -55,33 +56,15 @@ pub fn run_generated_with_config_and_scenario( let num_connections = connection_rng.index(3) + 1; let mut schema_rng = seed.fork(122).rng(); let schema = scenario.generate_schema(&mut schema_rng); - let mut generator = crate::workload::commitlog_ops::NextInteractionGeneratorComposite::new( + let generator = crate::workload::commitlog_ops::NextInteractionGeneratorComposite::new( seed, - scenario, + scenario.clone(), schema.clone(), num_connections, config.max_interactions_or_default(usize::MAX), ); - let mut engine = RelationalDbEngine::new(seed, &schema, num_connections)?; - let deadline = config.deadline(); - let mut step_index = 0usize; - - loop { - if deadline.is_some_and(|deadline| Instant::now() >= deadline) { - generator.request_finish(); - } - let Some(interaction) = generator.next_interaction() else { - break; - }; - trace!(step_index, ?interaction, "streaming interaction"); - engine - .execute(&interaction) - .map_err(|reason| anyhow::anyhow!("workload failed at step {step_index}: {reason}"))?; - step_index = step_index.saturating_add(1); - } - - let outcome = engine.collect_outcome().map_err(anyhow::Error::msg)?; - engine.finish(); + let engine = RelationalDbEngine::new(seed, scenario, &schema, num_connections)?; + let outcome = core::run_streaming(generator, engine, config)?; info!( applied_steps = outcome.applied_steps, durable_commit_count = outcome.durable_commit_count, @@ -118,7 +101,12 @@ struct RelationalDbEngine { type DurableSnapshot = BTreeMap>; impl RelationalDbEngine { - fn new(seed: DstSeed, schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { + fn new( + seed: DstSeed, + scenario: TableScenarioId, + schema: &SchemaPlan, + num_connections: usize, + ) -> anyhow::Result { let (db, runtime_handle, replica_dir, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; let mut this = Self { db: Some(db), @@ -130,7 +118,7 @@ impl RelationalDbEngine { last_observed_durable_offset: None, last_durable_snapshot: BTreeMap::new(), pending_snapshot_capture: false, - properties: PropertyRuntime::default(), + properties: PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections), runtime_handle, replica_dir, _runtime_guard: runtime_guard, @@ -306,7 +294,7 @@ impl RelationalDbEngine { fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String> { trace!(step = self.step, ?interaction, "table interaction"); - match interaction { + let applied: Result<(), String> = match interaction { TableWorkloadInteraction::BeginTx { conn } => { self.execution.ensure_known_connection(*conn)?; if self.execution.tx_by_connection[*conn].is_some() { @@ -368,7 +356,8 @@ impl RelationalDbEngine { let step = self.step as u64; self.with_property_runtime(|runtime, access| { runtime.on_insert(access, step, *conn, *table, &inserted_row, in_tx) - }) + })?; + Ok(()) } TableWorkloadInteraction::Delete { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); @@ -387,9 +376,14 @@ impl RelationalDbEngine { self.sync_and_snapshot(false)?; } let step = self.step as u64; - self.with_property_runtime(|runtime, access| runtime.on_delete(access, step, *conn, *table, row, in_tx)) + self.with_property_runtime(|runtime, access| { + runtime.on_delete(access, step, *conn, *table, row, in_tx) + })?; + Ok(()) } - } + }; + applied?; + self.with_property_runtime(|runtime, access| runtime.on_table_interaction(access, interaction)) } fn with_mut_tx( @@ -565,7 +559,7 @@ impl RelationalDbEngine { self.execution.active_writer.unwrap_or(conn) } - fn sync_and_snapshot(&mut self, forced: bool) -> Result<(), String> { + fn sync_and_snapshot(&mut self, _forced: bool) -> Result<(), String> { Ok(()) } @@ -699,6 +693,8 @@ impl RelationalDbEngine { fn collect_outcome(&mut self) -> Result { self.capture_pending_snapshot_if_idle()?; self.sync_and_snapshot(true)?; + let table = self.collect_table_outcome()?; + self.with_property_runtime(|runtime, access| runtime.on_table_workload_finish(access, &table))?; let durable_commit_count = self .last_observed_durable_offset .map(|offset| (offset as usize).saturating_add(1)) @@ -708,6 +704,23 @@ impl RelationalDbEngine { applied_steps: self.step, durable_commit_count, replay_table_count: self.last_durable_snapshot.len(), + table, + }) + } + + fn collect_table_outcome(&self) -> Result { + let mut final_rows = Vec::with_capacity(self.base_table_ids.len()); + let mut final_row_counts = Vec::with_capacity(self.base_table_ids.len()); + + for &table_id in &self.base_table_ids { + let rows = self.collect_rows_by_id(table_id)?; + final_row_counts.push(rows.len() as u64); + final_rows.push(rows); + } + + Ok(TableWorkloadOutcome { + final_row_counts, + final_rows, }) } @@ -762,48 +775,21 @@ impl TargetPropertyAccess for RelationalDbEngine { } } -fn reopen_from_history(history: impl History) -> Result { - debug!("reopen relational db from mocked durable history"); - let (db, connected_clients) = RelationalDB::open( - Identity::ZERO, - Identity::ZERO, - history, - None, - None, - PagePool::new_for_test(), - ) - .map_err(|err| format!("reopen from history failed: {err}"))?; - if !connected_clients.is_empty() { - return Err(format!( - "unexpected connected clients after replay: {connected_clients:?}" - )); +impl TargetEngine for RelationalDbEngine { + type Outcome = RelationalDbCommitlogOutcome; + type Error = String; + + fn execute_interaction(&mut self, interaction: &CommitlogInteraction) -> Result<(), Self::Error> { + self.execute(interaction) } - let tx = db.begin_tx(Workload::ForTests); - let schemas = db - .get_all_tables(&tx) - .map_err(|err| format!("list tables after replay failed: {err}"))?; - let mut snapshot = BTreeMap::>::new(); - for schema in schemas { - let name = schema.table_name.to_string(); - if !is_user_dst_table(&name) { - continue; - } - let mut rows = db - .iter(&tx, schema.table_id) - .map_err(|err| format!("scan replay table '{name}' failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - snapshot.insert(name, rows); + fn finish(&mut self) { + Self::finish(self); } - let _ = db.release_tx(tx); - debug!(tables = snapshot.len(), "reopen snapshot collected"); - Ok(snapshot) -} -fn is_user_dst_table(name: &str) -> bool { - !name.starts_with("st_") + fn collect_outcome(&mut self) -> anyhow::Result { + RelationalDbEngine::collect_outcome(self).map_err(anyhow::Error::msg) + } } fn bootstrap_relational_db( diff --git a/crates/dst/src/targets/standalone_host.rs b/crates/dst/src/targets/standalone_host.rs index 77ddbc19f9f..a7e06e67bfb 100644 --- a/crates/dst/src/targets/standalone_host.rs +++ b/crates/dst/src/targets/standalone_host.rs @@ -32,49 +32,27 @@ use crate::{ config::RunConfig, core::NextInteractionSource, seed::DstSeed, - workload::module_ops::{HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome, NextInteractionGenerator}, + workload::module_ops::{ + HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome, NextInteractionGenerator, + }, }; pub type StandaloneHostOutcome = ModuleWorkloadOutcome; -pub fn run_generated_with_config_and_scenario( - seed: DstSeed, - scenario: HostScenarioId, - config: RunConfig, -) -> anyhow::Result { - run_with_madsim_determinism(seed, scenario, config) -} - -fn run_with_madsim_determinism( - seed: DstSeed, - scenario: HostScenarioId, - config: RunConfig, -) -> anyhow::Result { - // Compile and cache module bytes before entering the deterministic replay. - // Module compilation may use host system threads, so do it outside the run. +pub fn prepare_generated_run() -> anyhow::Result<()> { let _ = compiled_module()?; - let (first_outcome, first_trace) = run_once_in_madsim_runtime(seed, scenario, config.clone())?; - let (second_outcome, second_trace) = run_once_in_madsim_runtime(seed, scenario, config)?; - if first_trace != second_trace { - anyhow::bail!("madsim deterministic replay mismatch: interaction trace differs"); - } - if first_outcome != second_outcome { - anyhow::bail!("madsim deterministic replay mismatch: outcome differs"); - } - Ok(first_outcome) + Ok(()) } -fn run_once_in_madsim_runtime( +pub async fn run_generated_with_config_and_scenario( seed: DstSeed, scenario: HostScenarioId, config: RunConfig, -) -> anyhow::Result<(StandaloneHostOutcome, Vec)> { - let mut runtime = madsim::runtime::Runtime::with_seed_and_config(seed.0, madsim::Config::default()); - runtime.set_allow_system_thread(true); - runtime.block_on(run_once_async(seed, scenario, config)) +) -> anyhow::Result { + let (outcome, _) = run_once_async(seed, scenario, config).await?; + Ok(outcome) } - async fn run_once_async( seed: DstSeed, scenario: HostScenarioId, @@ -108,12 +86,8 @@ async fn run_once_async( } // Replay contract: same seed/scenario/config must produce same interaction sequence. - let mut replay = NextInteractionGenerator::new( - seed, - scenario, - reducers, - config.max_interactions_or_default(usize::MAX), - ); + let mut replay = + NextInteractionGenerator::new(seed, scenario, reducers, config.max_interactions_or_default(usize::MAX)); let replayed = (0..trace_log.len()) .filter_map(|_| replay.next_interaction()) .collect::>(); @@ -148,9 +122,11 @@ fn compiled_module() -> anyhow::Result> { } async fn extract_reducer_specs(module: Arc) -> anyhow::Result> { - let module_def = - spacetimedb_core::host::extract_schema(module.program_bytes.clone().to_vec().into_boxed_slice(), module.host_type) - .await?; + let module_def = spacetimedb_core::host::extract_schema( + module.program_bytes.clone().to_vec().into_boxed_slice(), + module.host_type, + ) + .await?; Ok(module_def .reducers() .filter(|reducer| reducer.visibility == FunctionVisibility::ClientCallable) @@ -193,7 +169,9 @@ impl StandaloneHostEngine { SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos() ))); let _ = std::fs::remove_dir_all(&root_dir); - let session = open_session(&root_dir, &module, None).await.map_err(anyhow::Error::msg)?; + let session = open_session(&root_dir, &module, None) + .await + .map_err(anyhow::Error::msg)?; Ok(Self { root_dir, session: Some(session), @@ -307,11 +285,13 @@ async fn open_session( let caller_identity = Identity::ZERO; let db_identity = match maybe_db_identity { Some(identity) => identity, - None => SpacetimeAuth::alloc(&env) - .await - .map_err(|e| format!("db identity allocation failed: {e:#?}"))? - .claims - .identity, + None => { + SpacetimeAuth::alloc(&env) + .await + .map_err(|e| format!("db identity allocation failed: {e:#?}"))? + .claims + .identity + } }; if env diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index 6f3378f67fb..5858ee7feca 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -1,6 +1,6 @@ //! Serializable interaction model for relational-db + commitlog DST. -use crate::workload::table_ops::TableWorkloadInteraction; +use crate::workload::table_ops::{TableWorkloadInteraction, TableWorkloadOutcome}; /// One interaction in the commitlog-oriented mixed workload. #[derive(Clone, Debug, Eq, PartialEq)] @@ -25,4 +25,5 @@ pub struct CommitlogWorkloadOutcome { pub applied_steps: usize, pub durable_commit_count: usize, pub replay_table_count: usize, + pub table: TableWorkloadOutcome, } diff --git a/crates/dst/src/workload/module_ops/generation.rs b/crates/dst/src/workload/module_ops/generation.rs index 22898c8be84..e3049045ac5 100644 --- a/crates/dst/src/workload/module_ops/generation.rs +++ b/crates/dst/src/workload/module_ops/generation.rs @@ -26,7 +26,12 @@ pub(crate) struct NextInteractionGenerator { } impl NextInteractionGenerator { - pub fn new(seed: DstSeed, scenario: HostScenarioId, reducers: Vec, target_interactions: usize) -> Self { + pub fn new( + seed: DstSeed, + scenario: HostScenarioId, + reducers: Vec, + target_interactions: usize, + ) -> Self { Self { scenario, reducers, @@ -42,10 +47,12 @@ impl NextInteractionGenerator { fn choose_action(&mut self) -> ActionKind { match self.scenario { - HostScenarioId::HostSmoke => { - Weighted::new(vec![(85, ActionKind::Reducer), (10, ActionKind::Wait), (5, ActionKind::Reopen)]) - .sample(&mut self.rng) - } + HostScenarioId::HostSmoke => Weighted::new(vec![ + (85, ActionKind::Reducer), + (10, ActionKind::Wait), + (5, ActionKind::Reopen), + ]) + .sample(&mut self.rng), } } diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index 7af6500db7a..69f349ffc4a 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -1,14 +1,13 @@ -//! Shared transactional table workload used by datastore-like targets. +//! Shared transactional table workload used by table-oriented targets. mod generation; mod model; -mod runner; mod scenarios; pub(crate) mod strategies; mod types; pub(crate) use generation::NextInteractionGenerator; -pub(crate) use runner::run_generated_with_engine; +pub(crate) use model::ExpectedModel; pub use scenarios::TableScenarioId; -pub(crate) use types::{ConnectionWriteState, TableScenario, TableWorkloadEngine}; +pub(crate) use types::{ConnectionWriteState, TableScenario}; pub use types::{TableWorkloadInteraction, TableWorkloadOutcome}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index 206f15722ac..4a6b5d3b3c4 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -127,8 +127,8 @@ impl GenerationModel { /// Replay model for the expected final committed state of a table workload. /// -/// The shared runner applies every interaction here in parallel with the real -/// target execution, then compares the collected target outcome against this +/// Target property runtimes apply every table interaction here in parallel with +/// real target execution, then compare the collected target outcome against this /// model at the end of the run. #[derive(Clone, Debug)] pub struct ExpectedModel { diff --git a/crates/dst/src/workload/table_ops/runner.rs b/crates/dst/src/workload/table_ops/runner.rs deleted file mode 100644 index c77c90221fc..00000000000 --- a/crates/dst/src/workload/table_ops/runner.rs +++ /dev/null @@ -1,75 +0,0 @@ -use crate::{ - config::RunConfig, - core::{self, PropertySet}, - schema::SchemaPlan, - seed::DstSeed, -}; - -use super::{ - model::ExpectedModel, NextInteractionGenerator, TableScenario, TableWorkloadEngine, TableWorkloadInteraction, - TableWorkloadOutcome, -}; - -struct TablePropertyRuntime { - scenario: S, - schema: SchemaPlan, - expected: ExpectedModel, -} - -impl TablePropertyRuntime { - fn new(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self { - let table_count = schema.tables.len(); - Self { - scenario, - schema, - expected: ExpectedModel::new(table_count, num_connections), - } - } -} - -impl PropertySet for TablePropertyRuntime { - type Error = String; - - fn on_interaction(&mut self, interaction: &TableWorkloadInteraction, _step: usize) -> Result<(), Self::Error> { - self.expected.apply(interaction); - Ok(()) - } - - fn on_finish(&mut self, outcome: &TableWorkloadOutcome) -> Result<(), Self::Error> { - let expected_rows = self.expected.clone().committed_rows(); - if outcome.final_rows != expected_rows { - return Err(format!( - "final datastore state mismatch: expected={expected_rows:?} actual={:?}", - outcome.final_rows - )); - } - self.scenario - .validate_outcome(&self.schema, outcome) - .map_err(|err| format!("scenario invariant failed: {err}")) - } -} - -pub fn run_generated_with_engine( - seed: DstSeed, - scenario: S, - config: RunConfig, - make_engine: impl FnOnce(&SchemaPlan, usize) -> anyhow::Result, -) -> anyhow::Result -where - S: TableScenario, - E: TableWorkloadEngine, -{ - let mut rng = seed.fork(17).rng(); - let num_connections = rng.index(3) + 1; - let schema = scenario.generate_schema(&mut rng); - let generator = NextInteractionGenerator::new( - seed, - scenario.clone(), - schema.clone(), - num_connections, - config.max_interactions_or_default(usize::MAX), - ); - let engine = make_engine(&schema, num_connections)?; - let properties = TablePropertyRuntime::new(scenario, schema, num_connections); - core::run_streaming(generator, engine, properties, config) -} diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index d27d17bca5c..e036da63819 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -1,5 +1,4 @@ use crate::{ - core::TargetEngine, schema::{SchemaPlan, SimRow}, seed::DstRng, }; @@ -35,33 +34,6 @@ pub struct TableWorkloadOutcome { pub final_rows: Vec>, } -/// Minimal engine interface implemented by concrete table-oriented targets. -pub(crate) trait TableWorkloadEngine { - fn execute(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String>; - fn collect_outcome(&mut self) -> anyhow::Result; - fn finish(&mut self); -} - -impl TargetEngine for T -where - T: TableWorkloadEngine, -{ - type Outcome = TableWorkloadOutcome; - type Error = String; - - fn execute_interaction(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), Self::Error> { - self.execute(interaction) - } - - fn finish(&mut self) { - TableWorkloadEngine::finish(self); - } - - fn collect_outcome(&mut self) -> anyhow::Result { - TableWorkloadEngine::collect_outcome(self) - } -} - /// Per-connection write transaction bookkeeping shared by locking targets. pub(crate) struct ConnectionWriteState { /// Open mutable transaction handle for each simulated connection. diff --git a/crates/dst/tests/madsim_axum_reqwest.rs b/crates/dst/tests/madsim_axum_reqwest.rs new file mode 100644 index 00000000000..ce97606bd60 --- /dev/null +++ b/crates/dst/tests/madsim_axum_reqwest.rs @@ -0,0 +1,36 @@ +use std::{net::SocketAddr, time::Duration}; + +use axum::{routing::get, Router}; + +#[test] +fn axum_server_reqwest_client_over_madsim_tcp() { + let runtime = madsim::runtime::Runtime::with_seed_and_config(1, madsim::Config::default()); + let server_addr: SocketAddr = "10.0.0.1:3000".parse().unwrap(); + let client_addr: SocketAddr = "10.0.0.2:0".parse().unwrap(); + + let server = runtime.create_node().ip(server_addr.ip()).build(); + let client = runtime.create_node().ip(client_addr.ip()).build(); + let ready = std::sync::Arc::new(tokio::sync::Barrier::new(2)); + + let server_ready = ready.clone(); + server.spawn(async move { + let app = Router::new().route("/ping", get(|| async { "pong" })); + let listener = tokio::net::TcpListener::bind(server_addr).await.unwrap(); + server_ready.wait().await; + axum::serve(listener, app).await.unwrap(); + }); + + let client_task = client.spawn(async move { + ready.wait().await; + let url = format!("http://{server_addr}/ping"); + let body = reqwest::get(url).await.unwrap().text().await.unwrap(); + assert_eq!(body, "pong"); + }); + + runtime.block_on(async move { + tokio::time::timeout(Duration::from_secs(5), client_task) + .await + .unwrap() + .unwrap(); + }); +} From 12cf035f22cd4f17c7eb84620c63012c8846e242 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 29 Apr 2026 15:46:37 +0530 Subject: [PATCH 19/74] inmemory commitlog --- crates/dst/Cargo.toml | 2 +- .../src/targets/relational_db_commitlog.rs | 188 +++++++++++++----- 2 files changed, 143 insertions(+), 47 deletions(-) diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index e9cf4aab9c4..870781199f2 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -24,7 +24,7 @@ spacetimedb-client-api.workspace = true spacetimedb-client-api-messages.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.1.0" } -spacetimedb-commitlog.workspace = true +spacetimedb-commitlog = { workspace = true, features = ["test"] } spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.1.0" } spacetimedb-execution.workspace = true spacetimedb-lib.workspace = true diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 552961a66bf..a6238aa6f34 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -2,11 +2,21 @@ use std::{ collections::BTreeMap, + io, ops::Bound, - sync::Arc, - time::{SystemTime, UNIX_EPOCH}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, + }, }; +use spacetimedb_commitlog::{ + commitlog::Generic as GenericCommitlog, + error as commitlog_error, + payload::Txdata as CommitlogTxdata, + repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}, + Decoder as CommitlogDecoder, Transaction as CommitlogTransaction, +}; use spacetimedb_core::{ db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB}, messages::control_db::HostType, @@ -15,20 +25,20 @@ use spacetimedb_datastore::{ execution_context::Workload, traits::{IsolationLevel, Program}, }; -use spacetimedb_durability::EmptyHistory; +use spacetimedb_durability::{Close, Durability, DurableOffset, EmptyHistory, History, PreparedTx, TxOffset}; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, }; -use spacetimedb_paths::{server::ReplicaDir, FromPathUnchecked}; use spacetimedb_primitives::{SequenceId, TableId}; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; use spacetimedb_schema::{ def::BTreeAlgorithm, schema::{ColumnSchema, ConstraintSchema, IndexSchema, SequenceSchema, TableSchema}, table_name::TableName, }; use spacetimedb_table::page_pool::PagePool; +use tokio::sync::watch; use tracing::{debug, info, trace}; use crate::{ @@ -93,8 +103,9 @@ struct RelationalDbEngine { last_durable_snapshot: DurableSnapshot, pending_snapshot_capture: bool, properties: PropertyRuntime, + durability: Arc, runtime_handle: tokio::runtime::Handle, - replica_dir: ReplicaDir, + commitlog_repo: MemoryCommitlogRepo, _runtime_guard: Option, } @@ -107,7 +118,7 @@ impl RelationalDbEngine { schema: &SchemaPlan, num_connections: usize, ) -> anyhow::Result { - let (db, runtime_handle, replica_dir, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; + let (db, runtime_handle, commitlog_repo, durability, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; let mut this = Self { db: Some(db), execution: ConnectionWriteState::new(num_connections), @@ -119,8 +130,9 @@ impl RelationalDbEngine { last_durable_snapshot: BTreeMap::new(), pending_snapshot_capture: false, properties: PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections), + durability, runtime_handle, - replica_dir, + commitlog_repo, _runtime_guard: runtime_guard, }; this.install_base_schema().map_err(anyhow::Error::msg)?; @@ -209,34 +221,20 @@ impl RelationalDbEngine { .ok_or_else(|| "close/reopen failed: relational db not initialized".to_string())?; self.runtime_handle.block_on(old_db.shutdown()); drop(old_db); - info!("starting durability"); - - // In madsim we avoid blocking close here; dropping the close future - // triggers actor abort via durability's close guard. - - let durability = Arc::new( - spacetimedb_durability::Local::open( - self.replica_dir.clone(), - self.runtime_handle.clone(), - Default::default(), - None, - ) - .map_err(|err| format!("reopen local durability failed: {err}"))?, - ); + info!("starting in-memory durability"); + let durability = InMemoryCommitlogDurability::open(self.commitlog_repo.clone()) + .map_err(|err| format!("reopen in-memory durability failed: {err}"))?; let persistence = Persistence { durability: durability.clone(), - disk_size: Arc::new({ - let durability = durability.clone(); - move || durability.size_on_disk() - }), + disk_size: Arc::new(in_memory_size_on_disk), snapshots: None, runtime: self.runtime_handle.clone(), }; let (db, connected_clients) = RelationalDB::open( Identity::ZERO, Identity::ZERO, - durability.as_history(), + durability.clone(), Some(persistence), None, PagePool::new_for_test(), @@ -247,6 +245,7 @@ impl RelationalDbEngine { "unexpected connected clients after reopen: {connected_clients:?}" )); } + self.durability = durability; self.db = Some(db); self.rebuild_table_handles_after_reopen()?; self.capture_pending_snapshot_if_idle()?; @@ -559,7 +558,13 @@ impl RelationalDbEngine { self.execution.active_writer.unwrap_or(conn) } - fn sync_and_snapshot(&mut self, _forced: bool) -> Result<(), String> { + fn sync_and_snapshot(&mut self, forced: bool) -> Result<(), String> { + let durable_offset = self.durability.durable_tx_offset().last_seen(); + if forced || durable_offset != self.last_observed_durable_offset { + self.last_observed_durable_offset = durable_offset; + self.pending_snapshot_capture = true; + self.capture_pending_snapshot_if_idle()?; + } Ok(()) } @@ -792,12 +797,112 @@ impl TargetEngine for RelationalDbEngine { } } +type RelationalTxData = CommitlogTxdata; + +struct InMemoryCommitlogDurability { + log: Mutex>, + durable_tx: watch::Sender>, + durable_rx: watch::Receiver>, + closed: AtomicBool, +} + +impl InMemoryCommitlogDurability { + fn open(repo: MemoryCommitlogRepo) -> io::Result> { + let log = GenericCommitlog::open(repo, Default::default())?; + let durable_offset = log.max_committed_offset(); + let (durable_tx, durable_rx) = watch::channel(durable_offset); + Ok(Arc::new(Self { + log: Mutex::new(log), + durable_tx, + durable_rx, + closed: AtomicBool::new(false), + })) + } + + fn flush_and_sync(&self) -> Option { + let mut log = self.log.lock().expect("in-memory commitlog poisoned"); + log.flush().expect("in-memory commitlog flush failed"); + log.sync(); + let durable_offset = log.max_committed_offset(); + let _ = self.durable_tx.send(durable_offset); + durable_offset + } +} + +impl Durability for InMemoryCommitlogDurability { + type TxData = RelationalTxData; + + fn append_tx(&self, tx: PreparedTx) { + if self.closed.load(Ordering::SeqCst) { + panic!("in-memory durability is closed"); + } + let mut log = self.log.lock().expect("in-memory commitlog poisoned"); + log.commit([tx.into_transaction()]) + .expect("in-memory commitlog commit failed"); + log.flush().expect("in-memory commitlog flush failed"); + log.sync(); + let durable_offset = log.max_committed_offset(); + let _ = self.durable_tx.send(durable_offset); + } + + fn durable_tx_offset(&self) -> DurableOffset { + self.durable_rx.clone().into() + } + + fn close(&self) -> Close { + self.closed.store(true, Ordering::SeqCst); + let durable_offset = self.flush_and_sync(); + Box::pin(async move { durable_offset }) + } +} + +impl History for InMemoryCommitlogDurability { + type TxData = RelationalTxData; + + fn fold_transactions_from(&self, offset: TxOffset, decoder: D) -> Result<(), D::Error> + where + D: CommitlogDecoder, + D::Error: From, + { + self.log + .lock() + .expect("in-memory commitlog poisoned") + .fold_transactions_from(offset, decoder) + } + + fn transactions_from<'a, D>( + &self, + offset: TxOffset, + decoder: &'a D, + ) -> impl Iterator, D::Error>> + where + D: CommitlogDecoder, + D::Error: From, + Self::TxData: 'a, + { + self.log + .lock() + .expect("in-memory commitlog poisoned") + .transactions_from(offset, decoder) + .collect::>() + .into_iter() + } + + fn tx_range_hint(&self) -> (TxOffset, Option) { + let log = self.log.lock().expect("in-memory commitlog poisoned"); + let min = log.min_committed_offset().unwrap_or_default(); + let max = log.max_committed_offset(); + (min, max) + } +} + fn bootstrap_relational_db( - seed: DstSeed, + _seed: DstSeed, ) -> anyhow::Result<( RelationalDB, tokio::runtime::Handle, - ReplicaDir, + MemoryCommitlogRepo, + Arc, Option, )> { let (runtime_handle, runtime_guard) = if let Ok(handle) = tokio::runtime::Handle::try_current() { @@ -806,14 +911,12 @@ fn bootstrap_relational_db( let runtime = tokio::runtime::Runtime::new()?; (runtime.handle().clone(), Some(runtime)) }; - let replica_dir = dst_replica_dir(seed)?; - let durability = Arc::new( - spacetimedb_durability::Local::open(replica_dir.clone(), runtime_handle.clone(), Default::default(), None) - .map_err(|err| anyhow::anyhow!("open local durability failed: {err}"))?, - ); + let commitlog_repo = MemoryCommitlogRepo::unlimited(); + let durability = InMemoryCommitlogDurability::open(commitlog_repo.clone()) + .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?; let persistence = Persistence { durability: durability.clone(), - disk_size: Arc::new(move || durability.size_on_disk()), + disk_size: Arc::new(in_memory_size_on_disk), snapshots: None, runtime: runtime_handle.clone(), }; @@ -829,18 +932,11 @@ fn bootstrap_relational_db( db.with_auto_commit(Workload::Internal, |tx| { db.set_initialized(tx, Program::empty(HostType::Wasm.into())) })?; - Ok((db, runtime_handle, replica_dir, runtime_guard)) + Ok((db, runtime_handle, commitlog_repo, durability, runtime_guard)) } -fn dst_replica_dir(seed: DstSeed) -> anyhow::Result { - let nonce = SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos(); - let path = std::env::temp_dir().join(format!( - "spacetimedb-dst-relational-db-commitlog-{}-{}-{nonce}", - seed.0, - std::process::id() - )); - std::fs::create_dir_all(&path)?; - Ok(ReplicaDir::from_path_unchecked(path)) +fn in_memory_size_on_disk() -> io::Result { + Ok(SizeOnDisk::default()) } fn dynamic_table_name(slot: u32) -> String { From 8b5f5bc4ba9e319ad562a4d6a574cbcb56b2f9ba Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 30 Apr 2026 01:54:23 +0530 Subject: [PATCH 20/74] larger surface --- crates/commitlog/src/lib.rs | 54 +- crates/commitlog/src/repo/mod.rs | 12 + crates/dst/src/core/mod.rs | 5 +- crates/dst/src/targets/descriptor.rs | 6 +- crates/dst/src/targets/properties.rs | 951 +++++++++++++----- .../src/targets/relational_db_commitlog.rs | 449 ++++++--- .../dst/src/workload/table_ops/generation.rs | 31 +- crates/dst/src/workload/table_ops/mod.rs | 2 +- crates/dst/src/workload/table_ops/model.rs | 163 ++- .../workload/table_ops/scenarios/banking.rs | 24 +- .../table_ops/scenarios/random_crud.rs | 127 ++- crates/dst/src/workload/table_ops/types.rs | 192 +++- crates/durability/src/imp/local.rs | 86 +- 13 files changed, 1584 insertions(+), 518 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index 3922f002a84..26d37f97966 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -155,11 +155,14 @@ impl Options { /// /// Records in the log are of type `T`, which canonically is instantiated to /// [`payload::Txdata`]. -pub struct Commitlog { - inner: RwLock>, +pub struct Commitlog +where + R: Repo, +{ + inner: RwLock>, } -impl Commitlog { +impl Commitlog { /// Open the log at root directory `root` with [`Options`]. /// /// The root directory must already exist. @@ -178,7 +181,26 @@ impl Commitlog { root.display() ); } - let inner = commitlog::Generic::open(repo::Fs::new(root, on_new_segment)?, opts)?; + Self::open_with_repo(repo::Fs::new(root, on_new_segment)?, opts) + } + + /// Determine the size on disk of this commitlog. + pub fn size_on_disk(&self) -> io::Result { + let inner = self.inner.read().unwrap(); + inner.repo.size_on_disk() + } +} + +impl Commitlog +where + R: Repo, +{ + /// Open the log in `repo` with [`Options`]. + /// + /// This is useful for tests and simulators which provide a repository + /// implementation other than [`repo::Fs`]. + pub fn open_with_repo(repo: R, opts: Options) -> io::Result { + let inner = commitlog::Generic::open(repo, opts)?; Ok(Self { inner: RwLock::new(inner), @@ -307,7 +329,7 @@ impl Commitlog { /// This means that, when this iterator yields an `Err` value, the consumer /// may want to check if the iterator is exhausted (by calling `next()`) /// before treating the `Err` value as an application error. - pub fn commits(&self) -> impl Iterator> + use { + pub fn commits(&self) -> impl Iterator> + use { self.commits_from(0) } @@ -320,7 +342,10 @@ impl Commitlog { /// Note that the first [`StoredCommit`] yielded is the first commit /// containing the given transaction offset, i.e. its `min_tx_offset` may be /// smaller than `offset`. - pub fn commits_from(&self, offset: u64) -> impl Iterator> + use { + pub fn commits_from( + &self, + offset: u64, + ) -> impl Iterator> + use { self.inner.read().unwrap().commits_from(offset) } @@ -374,15 +399,12 @@ impl Commitlog { inner: RwLock::new(inner), }) } - - /// Determine the size on disk of this commitlog. - pub fn size_on_disk(&self) -> io::Result { - let inner = self.inner.read().unwrap(); - inner.repo.size_on_disk() - } } -impl Commitlog { +impl Commitlog +where + R: Repo, +{ /// Write `transactions` to the log. /// /// This will store all `transactions` as a single [Commit] @@ -452,10 +474,11 @@ impl Commitlog { pub fn transactions<'a, D>( &self, de: &'a D, - ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T> + ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T, R> where D: Decoder, D::Error: From, + R: 'a, T: 'a, { self.transactions_from(0, de) @@ -471,10 +494,11 @@ impl Commitlog { &self, offset: u64, de: &'a D, - ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T> + ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T, R> where D: Decoder, D::Error: From, + R: 'a, T: 'a, { self.inner.read().unwrap().transactions_from(offset, de) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 3d1968c00b2..5cc1451ef24 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -144,6 +144,18 @@ pub trait Repo: Clone + fmt::Display { } } +/// Marker for repos that do not require an external lock file. +/// +/// Durability implementations can use this to expose repo-backed opening +/// only for storage backends where skipping the filesystem `db.lock` cannot +/// violate single-writer safety. +pub trait RepoWithoutLockFile: Repo {} + +impl RepoWithoutLockFile for &T {} + +#[cfg(any(test, feature = "test"))] +impl RepoWithoutLockFile for Memory {} + impl Repo for &T { type SegmentWriter = T::SegmentWriter; type SegmentReader = T::SegmentReader; diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index e03b340add8..b7ad15769c5 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -25,13 +25,13 @@ pub trait TargetEngine { type Outcome; type Error; - fn execute_interaction(&mut self, interaction: &I) -> Result<(), Self::Error>; + async fn execute_interaction(&mut self, interaction: &I) -> Result<(), Self::Error>; fn finish(&mut self); fn collect_outcome(&mut self) -> anyhow::Result; } /// Shared streaming runner. -pub fn run_streaming(mut source: S, mut engine: E, cfg: RunConfig) -> anyhow::Result +pub async fn run_streaming(mut source: S, mut engine: E, cfg: RunConfig) -> anyhow::Result where I: Clone, S: NextInteractionSource, @@ -48,6 +48,7 @@ where }; engine .execute_interaction(&interaction) + .await .map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; step = step.saturating_add(1); } diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index e32848204c2..e9514a70b4f 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -30,9 +30,9 @@ impl TargetDescriptor for RelationalDbCommitlogDescriptor { fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { Box::pin(async move { - let outcome = crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario( - seed, scenario, config, - )?; + let outcome = + crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config) + .await?; Ok(format!( "ok target={} seed={} steps={} durable_commits={} replay_tables={}", Self::NAME, diff --git a/crates/dst/src/targets/properties.rs b/crates/dst/src/targets/properties.rs index 6ffb62b8af2..1aa959640f0 100644 --- a/crates/dst/src/targets/properties.rs +++ b/crates/dst/src/targets/properties.rs @@ -9,7 +9,10 @@ use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use crate::{ schema::{SchemaPlan, SimRow}, - workload::table_ops::{ExpectedModel, TableScenario, TableWorkloadInteraction, TableWorkloadOutcome}, + workload::table_ops::{ + ExpectedErrorKind, ExpectedModel, ExpectedResult, TableOperation, TableScenario, TableWorkloadInteraction, + TableWorkloadOutcome, + }, }; /// Target adapter for property evaluation. @@ -37,11 +40,141 @@ pub(crate) enum PropertyKind { WhereTrueFalseNull, IndexRangeExcluded, BankingTablesMatch, + DynamicMigrationAutoInc, + ExpectedErrorMatches, + PointLookupMatchesModel, + PredicateCountMatchesModel, + RangeScanMatchesModel, + FullScanMatchesModel, +} + +#[derive(Clone, Debug)] +pub(crate) struct DynamicMigrationProbe { + pub slot: u32, + pub from_version: u32, + pub to_version: u32, + pub existing_rows: Vec, + pub inserted_row: SimRow, +} + +#[derive(Clone, Debug)] +pub(crate) struct PropertyModels { + table: TableModel, +} + +#[derive(Clone, Debug)] +pub(crate) struct TableModel { + expected: ExpectedModel, +} + +pub(crate) struct PropertyContext<'a> { + pub access: &'a dyn TargetPropertyAccess, + pub models: &'a PropertyModels, +} + +#[derive(Clone, Debug)] +pub(crate) enum PropertyEvent<'a> { + TableInteractionApplied, + RowInserted { + conn: usize, + table: usize, + row: &'a SimRow, + in_tx: bool, + }, + RowDeleted { + conn: usize, + table: usize, + row: &'a SimRow, + in_tx: bool, + }, + ExpectedError { + kind: ExpectedErrorKind, + interaction: &'a TableWorkloadInteraction, + }, + PointLookup { + conn: usize, + table: usize, + id: u64, + actual: &'a Option, + }, + PredicateCount { + conn: usize, + table: usize, + col: u16, + value: &'a AlgebraicValue, + actual: usize, + }, + RangeScan { + conn: usize, + table: usize, + cols: &'a [u16], + lower: &'a Bound, + upper: &'a Bound, + actual: &'a [SimRow], + }, + FullScan { + conn: usize, + table: usize, + actual: &'a [SimRow], + }, + CommitOrRollback, + DynamicMigrationProbe(&'a DynamicMigrationProbe), + TableWorkloadFinished(&'a TableWorkloadOutcome), +} + +impl PropertyModels { + pub fn new(table_count: usize, num_connections: usize) -> Self { + Self { + table: TableModel { + expected: ExpectedModel::new(table_count, num_connections), + }, + } + } + + pub fn table(&self) -> &TableModel { + &self.table + } + + fn apply(&mut self, interaction: &TableWorkloadInteraction) { + self.table.expected.apply(interaction); + } +} + +impl TableModel { + pub fn committed_rows(&self) -> Vec> { + self.expected.clone().committed_rows() + } + + pub fn lookup_by_id(&self, conn: usize, table: usize, id: u64) -> Option { + self.expected.lookup_by_id(conn, table, id) + } + + pub fn predicate_count(&self, conn: usize, table: usize, col: u16, value: &AlgebraicValue) -> usize { + self.expected.predicate_count(conn, table, col, value) + } + + pub fn range_scan( + &self, + conn: usize, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + ) -> Vec { + self.expected.range_scan(conn, table, cols, lower, upper) + } + + pub fn full_scan(&self, conn: usize, table: usize) -> Vec { + let mut rows = self.expected.visible_rows(conn, table); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + rows + } } /// Mutable runtime holding selected property implementations. pub(crate) struct PropertyRuntime { rules: Vec, + models: PropertyModels, } impl PropertyRuntime { @@ -59,9 +192,30 @@ impl PropertyRuntime { PropertyKind::BankingTablesMatch => { rules.push(RuleEntry::new(*kind, Box::::default())) } + PropertyKind::DynamicMigrationAutoInc => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } + PropertyKind::ExpectedErrorMatches => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } + PropertyKind::PointLookupMatchesModel => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } + PropertyKind::PredicateCountMatchesModel => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } + PropertyKind::RangeScanMatchesModel => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } + PropertyKind::FullScanMatchesModel => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } } } - Self { rules } + Self { + rules, + models: PropertyModels::new(0, 0), + } } pub fn for_table_workload(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self @@ -69,12 +223,11 @@ impl PropertyRuntime { S: TableScenario + 'static, { let mut runtime = Self::default(); + runtime.models = PropertyModels::new(schema.tables.len(), num_connections); runtime .rules .push(RuleEntry::non_periodic(Box::new(ExpectedTableStateRule::new( - scenario, - schema, - num_connections, + scenario, schema, )))); runtime } @@ -84,8 +237,28 @@ impl PropertyRuntime { access: &dyn TargetPropertyAccess, interaction: &TableWorkloadInteraction, ) -> Result<(), String> { + match &interaction.op { + TableOperation::BeginTx { .. } | TableOperation::CommitTx { .. } | TableOperation::RollbackTx { .. } => { + self.models.apply(interaction) + } + TableOperation::BatchInsert { .. } + | TableOperation::BatchDelete { .. } + | TableOperation::Reinsert { .. } => self.models.apply(interaction), + TableOperation::Insert { .. } + | TableOperation::Delete { .. } + | TableOperation::DuplicateInsert { .. } + | TableOperation::DeleteMissing { .. } + | TableOperation::PointLookup { .. } + | TableOperation::PredicateCount { .. } + | TableOperation::RangeScan { .. } + | TableOperation::FullScan { .. } => {} + } + let ctx = PropertyContext { + access, + models: &self.models, + }; for entry in &mut self.rules { - entry.rule.on_table_interaction(access, interaction)?; + entry.rule.observe(&ctx, PropertyEvent::TableInteractionApplied)?; } Ok(()) } @@ -93,23 +266,28 @@ impl PropertyRuntime { pub fn on_insert( &mut self, access: &dyn TargetPropertyAccess, - step: u64, + _step: u64, conn: usize, table: usize, row: &SimRow, in_tx: bool, ) -> Result<(), String> { + self.models + .apply(&TableWorkloadInteraction::insert(conn, table, row.clone())); + let ctx = PropertyContext { + access, + models: &self.models, + }; for entry in &mut self.rules { - entry.rule.on_insert(access, step, conn, table, row, in_tx)?; - } - if !in_tx { - for entry in &mut self.rules { - if let Some(every) = entry.periodic_every() - && step.is_multiple_of(every) - { - entry.rule.on_periodic(access, table)?; - } - } + entry.rule.observe( + &ctx, + PropertyEvent::RowInserted { + conn, + table, + row, + in_tx, + }, + )?; } Ok(()) } @@ -117,30 +295,181 @@ impl PropertyRuntime { pub fn on_delete( &mut self, access: &dyn TargetPropertyAccess, - step: u64, + _step: u64, conn: usize, table: usize, row: &SimRow, in_tx: bool, ) -> Result<(), String> { + self.models + .apply(&TableWorkloadInteraction::delete(conn, table, row.clone())); + let ctx = PropertyContext { + access, + models: &self.models, + }; for entry in &mut self.rules { - entry.rule.on_delete(access, step, conn, table, row, in_tx)?; + entry.rule.observe( + &ctx, + PropertyEvent::RowDeleted { + conn, + table, + row, + in_tx, + }, + )?; } - if !in_tx { - for entry in &mut self.rules { - if let Some(every) = entry.periodic_every() - && step.is_multiple_of(every) - { - entry.rule.on_periodic(access, table)?; - } - } + Ok(()) + } + + pub fn on_expected_error( + &mut self, + access: &dyn TargetPropertyAccess, + kind: ExpectedErrorKind, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + if interaction.expected != ExpectedResult::Err(kind) { + return Err(format!( + "[ExpectedErrorMatches] expected {:?}, observed {kind:?} for {interaction:?}", + interaction.expected + )); + } + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry + .rule + .observe(&ctx, PropertyEvent::ExpectedError { kind, interaction })?; + } + Ok(()) + } + + pub fn on_point_lookup( + &mut self, + access: &dyn TargetPropertyAccess, + conn: usize, + table: usize, + id: u64, + actual: &Option, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::PointLookup { + conn, + table, + id, + actual, + }, + )?; + } + Ok(()) + } + + pub fn on_predicate_count( + &mut self, + access: &dyn TargetPropertyAccess, + conn: usize, + table: usize, + col: u16, + value: &AlgebraicValue, + actual: usize, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + }, + )?; + } + Ok(()) + } + + pub fn on_range_scan( + &mut self, + access: &dyn TargetPropertyAccess, + conn: usize, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + actual: &[SimRow], + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + }, + )?; + } + Ok(()) + } + + pub fn on_full_scan( + &mut self, + access: &dyn TargetPropertyAccess, + conn: usize, + table: usize, + actual: &[SimRow], + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry + .rule + .observe(&ctx, PropertyEvent::FullScan { conn, table, actual })?; } Ok(()) } pub fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe(&ctx, PropertyEvent::CommitOrRollback)?; + } + Ok(()) + } + + pub fn on_dynamic_migration_probe( + &mut self, + access: &dyn TargetPropertyAccess, + probe: &DynamicMigrationProbe, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; for entry in &mut self.rules { - entry.rule.on_commit_or_rollback(access)?; + entry.rule.observe(&ctx, PropertyEvent::DynamicMigrationProbe(probe))?; } Ok(()) } @@ -150,39 +479,31 @@ impl PropertyRuntime { access: &dyn TargetPropertyAccess, outcome: &TableWorkloadOutcome, ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; for entry in &mut self.rules { - entry.rule.on_table_workload_finish(access, outcome)?; + entry + .rule + .observe(&ctx, PropertyEvent::TableWorkloadFinished(outcome))?; } Ok(()) } } struct RuleEntry { - periodic_every: Option, rule: Box, } impl RuleEntry { fn new(kind: PropertyKind, rule: Box) -> Self { - Self { - periodic_every: match kind { - PropertyKind::SelectSelectOptimizer | PropertyKind::WhereTrueFalseNull => Some(16), - PropertyKind::IndexRangeExcluded => Some(64), - _ => None, - }, - rule, - } + let _ = kind; + Self { rule } } fn non_periodic(rule: Box) -> Self { - Self { - periodic_every: None, - rule, - } - } - - fn periodic_every(&self) -> Option { - self.periodic_every + Self { rule } } } @@ -195,56 +516,20 @@ impl Default for PropertyRuntime { PropertyKind::WhereTrueFalseNull, PropertyKind::IndexRangeExcluded, PropertyKind::BankingTablesMatch, + PropertyKind::DynamicMigrationAutoInc, + PropertyKind::ExpectedErrorMatches, + PropertyKind::PointLookupMatchesModel, + PropertyKind::PredicateCountMatchesModel, + PropertyKind::RangeScanMatchesModel, + PropertyKind::FullScanMatchesModel, ]) } } trait PropertyRule { - fn on_table_interaction( - &mut self, - _access: &dyn TargetPropertyAccess, - _interaction: &TableWorkloadInteraction, - ) -> Result<(), String> { - Ok(()) - } - - fn on_insert( - &mut self, - _access: &dyn TargetPropertyAccess, - _step: u64, - _conn: usize, - _table: usize, - _row: &SimRow, - _in_tx: bool, - ) -> Result<(), String> { - Ok(()) - } - - fn on_delete( - &mut self, - _access: &dyn TargetPropertyAccess, - _step: u64, - _conn: usize, - _table: usize, - _row: &SimRow, - _in_tx: bool, - ) -> Result<(), String> { - Ok(()) - } - - fn on_periodic(&mut self, _access: &dyn TargetPropertyAccess, _table: usize) -> Result<(), String> { - Ok(()) - } - - fn on_commit_or_rollback(&mut self, _access: &dyn TargetPropertyAccess) -> Result<(), String> { - Ok(()) - } - - fn on_table_workload_finish( - &mut self, - _access: &dyn TargetPropertyAccess, - _outcome: &TableWorkloadOutcome, - ) -> Result<(), String> { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let _ = ctx; + let _ = event; Ok(()) } } @@ -252,45 +537,31 @@ trait PropertyRule { struct ExpectedTableStateRule { scenario: S, schema: SchemaPlan, - expected: ExpectedModel, } impl ExpectedTableStateRule { - fn new(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self { - let table_count = schema.tables.len(); - Self { - scenario, - schema, - expected: ExpectedModel::new(table_count, num_connections), - } + fn new(scenario: S, schema: SchemaPlan) -> Self { + Self { scenario, schema } } } impl PropertyRule for ExpectedTableStateRule { - fn on_table_interaction( - &mut self, - _access: &dyn TargetPropertyAccess, - interaction: &TableWorkloadInteraction, - ) -> Result<(), String> { - self.expected.apply(interaction); - Ok(()) - } - - fn on_table_workload_finish( - &mut self, - _access: &dyn TargetPropertyAccess, - outcome: &TableWorkloadOutcome, - ) -> Result<(), String> { - let expected_rows = self.expected.clone().committed_rows(); - if outcome.final_rows != expected_rows { - return Err(format!( - "[ExpectedTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", - outcome.final_rows - )); + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + match event { + PropertyEvent::TableWorkloadFinished(outcome) => { + let expected_rows = ctx.models.table().committed_rows(); + if outcome.final_rows != expected_rows { + return Err(format!( + "[ExpectedTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + )); + } + self.scenario + .validate_outcome(&self.schema, outcome) + .map_err(|err| format!("[ExpectedTableState] scenario invariant failed: {err}")) + } + _ => Ok(()), } - self.scenario - .validate_outcome(&self.schema, outcome) - .map_err(|err| format!("[ExpectedTableState] scenario invariant failed: {err}")) } } @@ -298,17 +569,12 @@ impl PropertyRule for ExpectedTableStateRule { struct InsertSelectRule; impl PropertyRule for InsertSelectRule { - fn on_insert( - &mut self, - access: &dyn TargetPropertyAccess, - _step: u64, - conn: usize, - table: usize, - row: &SimRow, - _in_tx: bool, - ) -> Result<(), String> { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RowInserted { conn, table, row, .. } = event else { + return Ok(()); + }; let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = access.lookup_in_connection(conn, table, id)?; + let found = ctx.access.lookup_in_connection(conn, table, id)?; if found != Some(row.clone()) { return Err(format!( "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" @@ -322,17 +588,12 @@ impl PropertyRule for InsertSelectRule { struct DeleteSelectRule; impl PropertyRule for DeleteSelectRule { - fn on_delete( - &mut self, - access: &dyn TargetPropertyAccess, - _step: u64, - conn: usize, - table: usize, - row: &SimRow, - _in_tx: bool, - ) -> Result<(), String> { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RowDeleted { conn, table, row, .. } = event else { + return Ok(()); + }; let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - if access.lookup_in_connection(conn, table, id)?.is_some() { + if ctx.access.lookup_in_connection(conn, table, id)?.is_some() { return Err(format!( "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" )); @@ -341,46 +602,63 @@ impl PropertyRule for DeleteSelectRule { } } +fn post_write_check_tables(ctx: &PropertyContext<'_>, event: &PropertyEvent<'_>) -> Option> { + match event { + PropertyEvent::RowInserted { + table, in_tx: false, .. + } + | PropertyEvent::RowDeleted { + table, in_tx: false, .. + } => Some(vec![*table]), + PropertyEvent::CommitOrRollback => Some((0..ctx.access.schema_plan().tables.len()).collect()), + _ => None, + } +} + #[derive(Default)] struct NoRecRule; impl PropertyRule for NoRecRule { - fn on_periodic(&mut self, access: &dyn TargetPropertyAccess, table: usize) -> Result<(), String> { - let table_plan = access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let Some((col_idx, col_ty)) = table_plan - .columns - .iter() - .enumerate() - .skip(1) - .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) - .map(|(idx, col)| (idx as u16, &col.ty)) - else { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { return Ok(()); }; - - let scanned_rows = access.collect_rows_for_table(table)?; - if scanned_rows.is_empty() { - return Ok(()); - } - - let predicate_value = match col_ty { - AlgebraicType::Bool => AlgebraicValue::Bool(true), - AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), - _ => return Ok(()), - }; - let where_count = access.count_by_col_eq(table, col_idx, &predicate_value)?; - let projected_true_count = scanned_rows - .iter() - .filter(|row| row.values[col_idx as usize] == predicate_value) - .count(); - if where_count != projected_true_count { - return Err(format!( - "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" - )); + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some((col_idx, col_ty)) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) + .map(|(idx, col)| (idx as u16, &col.ty)) + else { + continue; + }; + let scanned_rows = ctx.access.collect_rows_for_table(table)?; + if scanned_rows.is_empty() { + continue; + } + let predicate_value = match col_ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), + _ => continue, + }; + let where_count = ctx.access.count_by_col_eq(table, col_idx, &predicate_value)?; + let projected_true_count = scanned_rows + .iter() + .filter(|row| row.values[col_idx as usize] == predicate_value) + .count(); + if where_count != projected_true_count { + return Err(format!( + "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" + )); + } } Ok(()) } @@ -390,30 +668,40 @@ impl PropertyRule for NoRecRule { struct TlpRule; impl PropertyRule for TlpRule { - fn on_periodic(&mut self, access: &dyn TargetPropertyAccess, table: usize) -> Result<(), String> { - let table_plan = access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let Some(col_idx) = table_plan - .columns - .iter() - .enumerate() - .skip(1) - .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) - .map(|(idx, _)| idx as u16) - else { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { return Ok(()); }; - let total = access.count_rows(table)?; - let true_count = access.count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; - let false_count = access.count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; - let partition_sum = true_count + false_count; - if partition_sum != total { - return Err(format!( - "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" - )); + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some(col_idx) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) + .map(|(idx, _)| idx as u16) + else { + continue; + }; + let total = ctx.access.count_rows(table)?; + let true_count = ctx + .access + .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; + let false_count = ctx + .access + .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; + let partition_sum = true_count + false_count; + if partition_sum != total { + return Err(format!( + "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" + )); + } } Ok(()) } @@ -423,55 +711,61 @@ impl PropertyRule for TlpRule { struct IndexRangeExcludedRule; impl PropertyRule for IndexRangeExcludedRule { - fn on_periodic(&mut self, access: &dyn TargetPropertyAccess, table: usize) -> Result<(), String> { - const MAX_ROWS_FOR_INDEX_SCAN_CHECK: usize = 512; - - let table_plan = access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let rows = access.collect_rows_for_table(table)?; - if rows.len() < 2 || rows.len() > MAX_ROWS_FOR_INDEX_SCAN_CHECK { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { return Ok(()); - } + }; + const MAX_ROWS_FOR_INDEX_SCAN_CHECK: usize = 512; - for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { - if !cols.iter().all(|&col| { - matches!( - table_plan.columns[col as usize].ty, - AlgebraicType::U64 | AlgebraicType::Bool - ) - }) { + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let rows = ctx.access.collect_rows_for_table(table)?; + if rows.len() < 2 || rows.len() > MAX_ROWS_FOR_INDEX_SCAN_CHECK { continue; } - let mut sorted_rows = rows.clone(); - sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); - let upper_key = sorted_rows[sorted_rows.len() - 1] - .project_key(cols) - .to_algebraic_value(); - let lower = Bound::Included(lower_key.clone()); - let upper = Bound::Excluded(upper_key.clone()); - - let mut expected_rows = sorted_rows - .into_iter() - .filter(|row| { - let key = row.project_key(cols).to_algebraic_value(); - key >= lower_key && key < upper_key - }) - .collect::>(); - expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - let mut actual_rows = access.range_scan(table, cols, lower, upper)?; - actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - if actual_rows != expected_rows { - return Err(format!( - "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" - )); + for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { + if !cols.iter().all(|&col| { + matches!( + table_plan.columns[col as usize].ty, + AlgebraicType::U64 | AlgebraicType::Bool + ) + }) { + continue; + } + + let mut sorted_rows = rows.clone(); + sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); + let upper_key = sorted_rows[sorted_rows.len() - 1] + .project_key(cols) + .to_algebraic_value(); + let lower = Bound::Included(lower_key.clone()); + let upper = Bound::Excluded(upper_key.clone()); + + let mut expected_rows = sorted_rows + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + key >= lower_key && key < upper_key + }) + .collect::>(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let mut actual_rows = ctx.access.range_scan(table, cols, lower, upper)?; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + if actual_rows != expected_rows { + return Err(format!( + "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" + )); + } } } @@ -483,38 +777,155 @@ impl PropertyRule for IndexRangeExcludedRule { struct BankingMatchRule; impl PropertyRule for BankingMatchRule { - fn on_insert( - &mut self, - access: &dyn TargetPropertyAccess, - _step: u64, - _conn: usize, - _table: usize, - _row: &SimRow, - in_tx: bool, - ) -> Result<(), String> { - if in_tx { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + match event { + PropertyEvent::RowInserted { in_tx: false, .. } + | PropertyEvent::RowDeleted { in_tx: false, .. } + | PropertyEvent::CommitOrRollback => check_banking_tables_match(ctx.access), + _ => Ok(()), + } + } +} + +#[derive(Default)] +struct DynamicMigrationAutoIncRule; + +impl PropertyRule for DynamicMigrationAutoIncRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::DynamicMigrationProbe(probe) = event else { return Ok(()); + }; + let max_existing_id = probe + .existing_rows + .iter() + .filter_map(sim_row_integer_id) + .max() + .unwrap_or(0); + let inserted_id = sim_row_integer_id(&probe.inserted_row).ok_or_else(|| { + format!( + "[DynamicMigrationAutoInc] probe row missing integer id for slot={}, from_version={}, to_version={}: {:?}", + probe.slot, probe.from_version, probe.to_version, probe.inserted_row + ) + })?; + if inserted_id <= max_existing_id { + return Err(format!( + "[DynamicMigrationAutoInc] non-advancing id for slot={}, from_version={}, to_version={}: inserted_id={}, max_existing_id={}", + probe.slot, probe.from_version, probe.to_version, inserted_id, max_existing_id + )); } - check_banking_tables_match(access) + Ok(()) } +} - fn on_delete( - &mut self, - access: &dyn TargetPropertyAccess, - _step: u64, - _conn: usize, - _table: usize, - _row: &SimRow, - in_tx: bool, - ) -> Result<(), String> { - if in_tx { +#[derive(Default)] +struct ExpectedErrorMatchesRule; + +impl PropertyRule for ExpectedErrorMatchesRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::ExpectedError { kind, interaction } = event else { + return Ok(()); + }; + if interaction.expected == ExpectedResult::Err(kind) { + Ok(()) + } else { + Err(format!( + "[ExpectedErrorMatches] observed {kind:?}, but interaction expected {:?}: {interaction:?}", + interaction.expected + )) + } + } +} + +#[derive(Default)] +struct PointLookupMatchesModelRule; + +impl PropertyRule for PointLookupMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::PointLookup { + conn, + table, + id, + actual, + } = event + else { return Ok(()); + }; + let expected = ctx.models.table().lookup_by_id(conn, table, id); + if *actual != expected { + return Err(format!( + "[Model::PointLookup] mismatch conn={conn}, table={table}, id={id}: expected={expected:?}, actual={actual:?}" + )); } - check_banking_tables_match(access) + Ok(()) } +} - fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { - check_banking_tables_match(access) +#[derive(Default)] +struct PredicateCountMatchesModelRule; + +impl PropertyRule for PredicateCountMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().predicate_count(conn, table, col, value); + if actual != expected { + return Err(format!( + "[Model::PredicateCount] mismatch conn={conn}, table={table}, col={col}, value={value:?}: expected={expected}, actual={actual}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct RangeScanMatchesModelRule; + +impl PropertyRule for RangeScanMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().range_scan(conn, table, cols, lower, upper); + if actual != expected.as_slice() { + return Err(format!( + "[Model::RangeScan] mismatch conn={conn}, table={table}, cols={cols:?}, lower={lower:?}, upper={upper:?}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct FullScanMatchesModelRule; + +impl PropertyRule for FullScanMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::FullScan { conn, table, actual } = event else { + return Ok(()); + }; + let expected = ctx.models.table().full_scan(conn, table); + if actual != expected.as_slice() { + return Err(format!( + "[Model::FullScan] mismatch conn={conn}, table={table}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) } } @@ -542,3 +953,11 @@ fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::O .cmp(&rhs.project_key(cols).to_algebraic_value()) .then_with(|| lhs.values.cmp(&rhs.values)) } + +fn sim_row_integer_id(row: &SimRow) -> Option { + match row.values.first() { + Some(AlgebraicValue::I64(value)) => Some(*value as i128), + Some(AlgebraicValue::U64(value)) => Some(*value as i128), + _ => None, + } +} diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index a6238aa6f34..87f50666ccb 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -1,31 +1,18 @@ //! RelationalDB DST target with mocked commitlog file chaos and replay checks. -use std::{ - collections::BTreeMap, - io, - ops::Bound, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Mutex, - }, -}; +use std::{collections::BTreeMap, io, ops::Bound, panic::AssertUnwindSafe, sync::Arc}; -use spacetimedb_commitlog::{ - commitlog::Generic as GenericCommitlog, - error as commitlog_error, - payload::Txdata as CommitlogTxdata, - repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}, - Decoder as CommitlogDecoder, Transaction as CommitlogTransaction, -}; +use spacetimedb_commitlog::repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}; use spacetimedb_core::{ db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB}, + error::{DBError, DatastoreError, IndexError}, messages::control_db::HostType, }; use spacetimedb_datastore::{ execution_context::Workload, traits::{IsolationLevel, Program}, }; -use spacetimedb_durability::{Close, Durability, DurableOffset, EmptyHistory, History, PreparedTx, TxOffset}; +use spacetimedb_durability::{Durability, EmptyHistory, Local}; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, @@ -38,7 +25,6 @@ use spacetimedb_schema::{ table_name::TableName, }; use spacetimedb_table::page_pool::PagePool; -use tokio::sync::watch; use tracing::{debug, info, trace}; use crate::{ @@ -46,18 +32,19 @@ use crate::{ core::{self, TargetEngine}, schema::{SchemaPlan, SimRow}, seed::DstSeed, - targets::properties::{PropertyRuntime, TargetPropertyAccess}, + targets::properties::{DynamicMigrationProbe, PropertyRuntime, TargetPropertyAccess}, workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, table_ops::{ - ConnectionWriteState, TableScenario, TableScenarioId, TableWorkloadInteraction, TableWorkloadOutcome, + ConnectionWriteState, ExpectedErrorKind, TableOperation, TableScenario, TableScenarioId, + TableWorkloadInteraction, TableWorkloadOutcome, }, }, }; pub type RelationalDbCommitlogOutcome = CommitlogWorkloadOutcome; -pub fn run_generated_with_config_and_scenario( +pub async fn run_generated_with_config_and_scenario( seed: DstSeed, scenario: TableScenarioId, config: RunConfig, @@ -74,7 +61,7 @@ pub fn run_generated_with_config_and_scenario( config.max_interactions_or_default(usize::MAX), ); let engine = RelationalDbEngine::new(seed, scenario, &schema, num_connections)?; - let outcome = core::run_streaming(generator, engine, config)?; + let outcome = core::run_streaming(generator, engine, config).await?; info!( applied_steps = outcome.applied_steps, durable_commit_count = outcome.durable_commit_count, @@ -194,7 +181,7 @@ impl RelationalDbEngine { .map_err(|err| format!("install base schema commit failed: {err}")) } - fn execute(&mut self, interaction: &CommitlogInteraction) -> Result<(), String> { + async fn execute(&mut self, interaction: &CommitlogInteraction) -> Result<(), String> { self.step = self.step.saturating_add(1); match interaction { CommitlogInteraction::Table(op) => self.execute_table_op(op), @@ -202,11 +189,11 @@ impl RelationalDbEngine { CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), CommitlogInteraction::ChaosSync => self.sync_and_snapshot(true), - CommitlogInteraction::CloseReopen => self.close_and_reopen(), + CommitlogInteraction::CloseReopen => self.close_and_reopen().await, } } - fn close_and_reopen(&mut self) -> Result<(), String> { + async fn close_and_reopen(&mut self) -> Result<(), String> { if self.execution.active_writer.is_some() || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) { trace!("skip close/reopen while transaction is open"); return Ok(()); @@ -219,12 +206,18 @@ impl RelationalDbEngine { .db .take() .ok_or_else(|| "close/reopen failed: relational db not initialized".to_string())?; - self.runtime_handle.block_on(old_db.shutdown()); + old_db.shutdown().await; drop(old_db); info!("starting in-memory durability"); - let durability = InMemoryCommitlogDurability::open(self.commitlog_repo.clone()) - .map_err(|err| format!("reopen in-memory durability failed: {err}"))?; + let durability = Arc::new( + InMemoryCommitlogDurability::open_with_repo( + self.commitlog_repo.clone(), + self.runtime_handle.clone(), + Default::default(), + ) + .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, + ); let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), @@ -234,7 +227,7 @@ impl RelationalDbEngine { let (db, connected_clients) = RelationalDB::open( Identity::ZERO, Identity::ZERO, - durability.clone(), + durability.as_history(), Some(persistence), None, PagePool::new_for_test(), @@ -292,9 +285,19 @@ impl RelationalDbEngine { } fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String> { + match std::panic::catch_unwind(AssertUnwindSafe(|| self.execute_table_op_inner(interaction))) { + Ok(result) => result, + Err(payload) => Err(format!( + "[DatastoreNeverPanics] interaction panicked: interaction={interaction:?}, payload={}", + panic_payload_to_string(&payload) + )), + } + } + + fn execute_table_op_inner(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String> { trace!(step = self.step, ?interaction, "table interaction"); - let applied: Result<(), String> = match interaction { - TableWorkloadInteraction::BeginTx { conn } => { + let applied: Result<(), String> = match &interaction.op { + TableOperation::BeginTx { conn } => { self.execution.ensure_known_connection(*conn)?; if self.execution.tx_by_connection[*conn].is_some() { return Err(format!("connection {conn} already has open transaction")); @@ -311,7 +314,7 @@ impl RelationalDbEngine { self.execution.active_writer = Some(*conn); Ok(()) } - TableWorkloadInteraction::CommitTx { conn } => { + TableOperation::CommitTx { conn } => { self.execution.ensure_writer_owner(*conn, "commit")?; let tx = self.execution.tx_by_connection[*conn] .take() @@ -324,7 +327,7 @@ impl RelationalDbEngine { self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; Ok(()) } - TableWorkloadInteraction::RollbackTx { conn } => { + TableOperation::RollbackTx { conn } => { self.execution.ensure_writer_owner(*conn, "rollback")?; let tx = self.execution.tx_by_connection[*conn] .take() @@ -335,7 +338,7 @@ impl RelationalDbEngine { self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; Ok(()) } - TableWorkloadInteraction::Insert { conn, table, row } => { + TableOperation::Insert { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); let inserted_row = self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine @@ -358,7 +361,7 @@ impl RelationalDbEngine { })?; Ok(()) } - TableWorkloadInteraction::Delete { conn, table, row } => { + TableOperation::Delete { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine @@ -380,6 +383,144 @@ impl RelationalDbEngine { })?; Ok(()) } + TableOperation::DuplicateInsert { conn, table, row } => { + let outcome = self.with_mut_tx(*conn, |engine, tx| { + let table_id = *engine + .base_table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + match engine.db()?.insert(tx, table_id, &bsatn) { + Ok(_) => Ok(Err("duplicate insert unexpectedly succeeded".to_string())), + Err(err) if is_unique_constraint_violation(&err) => Ok(Ok(())), + Err(err) => Ok(Err(format!( + "duplicate insert returned wrong error: expected={:?}, actual={err}", + ExpectedErrorKind::UniqueConstraintViolation + ))), + } + })?; + match outcome { + Ok(()) => self.with_property_runtime(|runtime, access| { + runtime.on_expected_error(access, ExpectedErrorKind::UniqueConstraintViolation, interaction) + }), + Err(err) => Err(format!("[ExpectedErrorMatches] {err}; interaction={interaction:?}")), + } + } + TableOperation::DeleteMissing { conn, table, row } => { + let deleted = self.with_mut_tx(*conn, |engine, tx| { + let table_id = *engine + .base_table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + Ok(engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()])) + })?; + if deleted == 0 { + self.with_property_runtime(|runtime, access| { + runtime.on_expected_error(access, ExpectedErrorKind::MissingRow, interaction) + }) + } else { + Err(format!( + "[ExpectedErrorDoesNotMutate] missing delete removed {deleted} rows; interaction={interaction:?}" + )) + } + } + TableOperation::BatchInsert { conn, table, rows } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); + self.with_mut_tx(*conn, |engine, tx| { + let table_id = *engine + .base_table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + for row in rows { + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + engine + .db()? + .insert(tx, table_id, &bsatn) + .map_err(|err| format!("batch insert failed: {err}"))?; + } + Ok(()) + })?; + if !in_tx { + self.sync_and_snapshot(false)?; + } + Ok(()) + } + TableOperation::BatchDelete { conn, table, rows } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); + self.with_mut_tx(*conn, |engine, tx| { + let table_id = *engine + .base_table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + for row in rows { + let deleted = engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()]); + if deleted != 1 { + return Err(format!("batch delete expected 1 row, got {deleted} for row={row:?}")); + } + } + Ok(()) + })?; + if !in_tx { + self.sync_and_snapshot(false)?; + } + Ok(()) + } + TableOperation::Reinsert { conn, table, row } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); + self.with_mut_tx(*conn, |engine, tx| { + let table_id = *engine + .base_table_ids + .get(*table) + .ok_or_else(|| format!("table {table} out of range"))?; + let deleted = engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()]); + if deleted != 1 { + return Err(format!("reinsert delete expected 1 row, got {deleted} for row={row:?}")); + } + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + engine + .db()? + .insert(tx, table_id, &bsatn) + .map_err(|err| format!("reinsert insert failed: {err}"))?; + Ok(()) + })?; + if !in_tx { + self.sync_and_snapshot(false)?; + } + Ok(()) + } + TableOperation::PointLookup { conn, table, id } => { + let actual = self.lookup_base_row(*conn, *table, *id)?; + self.with_property_runtime(|runtime, access| { + runtime.on_point_lookup(access, *conn, *table, *id, &actual) + }) + } + TableOperation::PredicateCount { + conn, + table, + col, + value, + } => { + let actual = self.count_by_col_eq_in_connection(*conn, *table, *col, value)?; + self.with_property_runtime(|runtime, access| { + runtime.on_predicate_count(access, *conn, *table, *col, value, actual) + }) + } + TableOperation::RangeScan { + conn, + table, + cols, + lower, + upper, + } => { + let actual = self.range_scan_in_connection(*conn, *table, cols, lower.clone(), upper.clone())?; + self.with_property_runtime(|runtime, access| { + runtime.on_range_scan(access, *conn, *table, cols, lower, upper, &actual) + }) + } + TableOperation::FullScan { conn, table } => { + let actual = self.collect_rows_in_connection(*conn, *table)?; + self.with_property_runtime(|runtime, access| runtime.on_full_scan(access, *conn, *table, &actual)) + } }; applied?; self.with_property_runtime(|runtime, access| runtime.on_table_interaction(access, interaction)) @@ -498,9 +639,9 @@ impl RelationalDbEngine { } let conn = self.normalize_conn(conn); debug!(step = self.step, conn, slot, "migrate dynamic table"); - self.with_mut_tx(conn, |engine, tx| { + let probe = self.with_mut_tx(conn, |engine, tx| { let Some(state) = engine.dynamic_tables.get(&slot).cloned() else { - return Ok(()); + return Ok(None); }; let to_version = state.version.saturating_add(1); let new_table_id = engine @@ -519,14 +660,6 @@ impl RelationalDbEngine { .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>(); - // Sequence regression probe: - // after add-columns migration, force one auto-inc insert. - // If sequence state was reset by migration, this can collide with existing ids. - let max_existing_id = existing_rows - .iter() - .filter_map(sim_row_integer_id) - .max() - .unwrap_or(0); let probe_row = dynamic_probe_row(slot, to_version); let bsatn = probe_row.to_bsatn().map_err(|err| err.to_string())?; let (_, inserted_ref, _) = engine @@ -534,13 +667,6 @@ impl RelationalDbEngine { .insert(tx, new_table_id, &bsatn) .map_err(|err| format!("migrate auto-inc probe failed for slot={slot}: {err}"))?; let inserted = SimRow::from_product_value(inserted_ref.to_product_value()); - let inserted_id = sim_row_integer_id(&inserted) - .ok_or_else(|| format!("migrate probe row missing id: {inserted:?}"))?; - if inserted_id <= max_existing_id { - return Err(format!( - "migrate auto-inc probe produced non-advancing id for slot={slot}: inserted_id={inserted_id}, max_existing_id={max_existing_id}" - )); - } engine.dynamic_tables.insert( slot, DynamicTableState { @@ -549,8 +675,17 @@ impl RelationalDbEngine { table_id: new_table_id, }, ); - Ok(()) + Ok(Some(DynamicMigrationProbe { + slot, + from_version: state.version, + to_version, + existing_rows, + inserted_row: inserted, + })) })?; + if let Some(probe) = probe { + self.with_property_runtime(|runtime, access| runtime.on_dynamic_migration_probe(access, &probe))?; + } self.sync_and_snapshot(false) } @@ -606,6 +741,73 @@ impl RelationalDbEngine { } } + fn collect_rows_in_connection(&self, conn: usize, table: usize) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + let mut rows = self + .db()? + .iter_mut(tx, table_id) + .map_err(|err| format!("in-tx scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } else { + self.collect_rows_by_id(table_id) + } + } + + fn count_by_col_eq_in_connection( + &self, + conn: usize, + table: usize, + col: u16, + value: &AlgebraicValue, + ) -> Result { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + Ok(self + .db()? + .iter_by_col_eq_mut(tx, table_id, col, value) + .map_err(|err| format!("in-tx predicate query failed: {err}"))? + .count()) + } else { + self.count_by_col_eq_for_property(table, col, value) + } + } + + fn range_scan_in_connection( + &self, + conn: usize, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + let col_list = cols.iter().copied().collect::(); + let mut rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + self.db()? + .iter_by_col_range_mut(tx, table_id, col_list, (lower, upper)) + .map_err(|err| format!("in-tx range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>() + } else { + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); + let rows = self + .db()? + .iter_by_col_range(&tx, table_id, col_list, (lower, upper)) + .map_err(|err| format!("range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + let _ = db.release_tx(tx); + rows + }; + rows.sort_by(|lhs, rhs| compare_rows_for_range(lhs, rhs, cols)); + Ok(rows) + } + fn count_rows_for_property(&self, table: usize) -> Result { let table_id = self.table_id_for_index(table)?; let db = self.db()?; @@ -784,8 +986,8 @@ impl TargetEngine for RelationalDbEngine { type Outcome = RelationalDbCommitlogOutcome; type Error = String; - fn execute_interaction(&mut self, interaction: &CommitlogInteraction) -> Result<(), Self::Error> { - self.execute(interaction) + async fn execute_interaction(&mut self, interaction: &CommitlogInteraction) -> Result<(), Self::Error> { + self.execute(interaction).await } fn finish(&mut self) { @@ -797,104 +999,7 @@ impl TargetEngine for RelationalDbEngine { } } -type RelationalTxData = CommitlogTxdata; - -struct InMemoryCommitlogDurability { - log: Mutex>, - durable_tx: watch::Sender>, - durable_rx: watch::Receiver>, - closed: AtomicBool, -} - -impl InMemoryCommitlogDurability { - fn open(repo: MemoryCommitlogRepo) -> io::Result> { - let log = GenericCommitlog::open(repo, Default::default())?; - let durable_offset = log.max_committed_offset(); - let (durable_tx, durable_rx) = watch::channel(durable_offset); - Ok(Arc::new(Self { - log: Mutex::new(log), - durable_tx, - durable_rx, - closed: AtomicBool::new(false), - })) - } - - fn flush_and_sync(&self) -> Option { - let mut log = self.log.lock().expect("in-memory commitlog poisoned"); - log.flush().expect("in-memory commitlog flush failed"); - log.sync(); - let durable_offset = log.max_committed_offset(); - let _ = self.durable_tx.send(durable_offset); - durable_offset - } -} - -impl Durability for InMemoryCommitlogDurability { - type TxData = RelationalTxData; - - fn append_tx(&self, tx: PreparedTx) { - if self.closed.load(Ordering::SeqCst) { - panic!("in-memory durability is closed"); - } - let mut log = self.log.lock().expect("in-memory commitlog poisoned"); - log.commit([tx.into_transaction()]) - .expect("in-memory commitlog commit failed"); - log.flush().expect("in-memory commitlog flush failed"); - log.sync(); - let durable_offset = log.max_committed_offset(); - let _ = self.durable_tx.send(durable_offset); - } - - fn durable_tx_offset(&self) -> DurableOffset { - self.durable_rx.clone().into() - } - - fn close(&self) -> Close { - self.closed.store(true, Ordering::SeqCst); - let durable_offset = self.flush_and_sync(); - Box::pin(async move { durable_offset }) - } -} - -impl History for InMemoryCommitlogDurability { - type TxData = RelationalTxData; - - fn fold_transactions_from(&self, offset: TxOffset, decoder: D) -> Result<(), D::Error> - where - D: CommitlogDecoder, - D::Error: From, - { - self.log - .lock() - .expect("in-memory commitlog poisoned") - .fold_transactions_from(offset, decoder) - } - - fn transactions_from<'a, D>( - &self, - offset: TxOffset, - decoder: &'a D, - ) -> impl Iterator, D::Error>> - where - D: CommitlogDecoder, - D::Error: From, - Self::TxData: 'a, - { - self.log - .lock() - .expect("in-memory commitlog poisoned") - .transactions_from(offset, decoder) - .collect::>() - .into_iter() - } - - fn tx_range_hint(&self) -> (TxOffset, Option) { - let log = self.log.lock().expect("in-memory commitlog poisoned"); - let min = log.min_committed_offset().unwrap_or_default(); - let max = log.max_committed_offset(); - (min, max) - } -} +type InMemoryCommitlogDurability = Local; fn bootstrap_relational_db( _seed: DstSeed, @@ -912,8 +1017,10 @@ fn bootstrap_relational_db( (runtime.handle().clone(), Some(runtime)) }; let commitlog_repo = MemoryCommitlogRepo::unlimited(); - let durability = InMemoryCommitlogDurability::open(commitlog_repo.clone()) - .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?; + let durability = Arc::new( + InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime_handle.clone(), Default::default()) + .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?, + ); let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), @@ -939,6 +1046,30 @@ fn in_memory_size_on_disk() -> io::Result { Ok(SizeOnDisk::default()) } +fn is_unique_constraint_violation(err: &DBError) -> bool { + matches!( + err, + DBError::Datastore(DatastoreError::Index(IndexError::UniqueConstraintViolation(_))) + ) +} + +fn panic_payload_to_string(payload: &Box) -> String { + if let Some(message) = payload.downcast_ref::() { + message.clone() + } else if let Some(message) = payload.downcast_ref::<&'static str>() { + (*message).to_string() + } else { + "".to_string() + } +} + +fn compare_rows_for_range(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) +} + fn dynamic_table_name(slot: u32) -> String { format!("dst_dynamic_slot_{slot}") } @@ -996,11 +1127,3 @@ fn dynamic_schema(name: &str, version: u32) -> TableSchema { None, ) } - -fn sim_row_integer_id(row: &SimRow) -> Option { - match row.values.first() { - Some(AlgebraicValue::I64(value)) => Some(*value as i128), - Some(AlgebraicValue::U64(value)) => Some(*value as i128), - _ => None, - } -} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index 39f198c531b..d3ebb5badd9 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -2,7 +2,7 @@ use std::collections::VecDeque; use crate::{ core::NextInteractionSource, - schema::SchemaPlan, + schema::{SchemaPlan, TablePlan}, seed::{DstRng, DstSeed}, workload::strategy::{Index, Percent, Strategy}, }; @@ -79,17 +79,17 @@ impl<'a> ScenarioPlanner<'a> { { TxControlAction::Begin if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() => { self.model.begin_tx(conn); - self.pending.push_back(TableWorkloadInteraction::BeginTx { conn }); + self.pending.push_back(TableWorkloadInteraction::begin_tx(conn)); true } TxControlAction::Commit if self.model.connections[conn].in_tx => { self.model.commit(conn); - self.pending.push_back(TableWorkloadInteraction::CommitTx { conn }); + self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); true } TxControlAction::Rollback if self.model.connections[conn].in_tx => { self.model.rollback(conn); - self.pending.push_back(TableWorkloadInteraction::RollbackTx { conn }); + self.pending.push_back(TableWorkloadInteraction::rollback_tx(conn)); true } _ => false, @@ -100,6 +100,10 @@ impl<'a> ScenarioPlanner<'a> { self.model.visible_rows(conn, table) } + pub fn table_plan(&self, table: usize) -> &TablePlan { + &self.model.schema.tables[table] + } + pub fn make_row(&mut self, table: usize) -> crate::schema::SimRow { self.model.make_row(self.rng, table) } @@ -108,10 +112,27 @@ impl<'a> ScenarioPlanner<'a> { self.model.insert(conn, table, row); } + pub fn batch_insert(&mut self, conn: usize, table: usize, rows: &[crate::schema::SimRow]) { + self.model.batch_insert(conn, table, rows); + } + pub fn delete(&mut self, conn: usize, table: usize, row: crate::schema::SimRow) { self.model.delete(conn, table, row); } + pub fn batch_delete(&mut self, conn: usize, table: usize, rows: &[crate::schema::SimRow]) { + self.model.batch_delete(conn, table, rows); + } + + pub fn reinsert(&mut self, conn: usize, table: usize, row: crate::schema::SimRow) { + self.model.delete(conn, table, row.clone()); + self.model.insert(conn, table, row); + } + + pub fn absent_row(&mut self, conn: usize, table: usize) -> crate::schema::SimRow { + self.model.absent_row(self.rng, conn, table) + } + pub fn push_interaction(&mut self, interaction: TableWorkloadInteraction) { self.pending.push_back(interaction); } @@ -151,7 +172,7 @@ impl NextInteractionGenerator { self.finalize_conn += 1; if self.model.connections[conn].in_tx { self.model.commit(conn); - self.pending.push_back(TableWorkloadInteraction::CommitTx { conn }); + self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); return; } } diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index 69f349ffc4a..21ff91c0743 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -10,4 +10,4 @@ pub(crate) use generation::NextInteractionGenerator; pub(crate) use model::ExpectedModel; pub use scenarios::TableScenarioId; pub(crate) use types::{ConnectionWriteState, TableScenario}; -pub use types::{TableWorkloadInteraction, TableWorkloadOutcome}; +pub use types::{ExpectedErrorKind, ExpectedResult, TableOperation, TableWorkloadInteraction, TableWorkloadOutcome}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index 4a6b5d3b3c4..885a8673f2b 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -1,3 +1,5 @@ +use std::ops::Bound; + use spacetimedb_sats::AlgebraicValue; use crate::{ @@ -5,7 +7,7 @@ use crate::{ seed::{DstRng, DstSeed}, }; -use super::TableWorkloadInteraction; +use super::{ExpectedResult, TableOperation, TableWorkloadInteraction}; /// Generator-side model of committed rows plus per-connection pending writes. /// @@ -68,6 +70,14 @@ impl GenerationModel { rows } + pub(crate) fn absent_row(&mut self, rng: &mut DstRng, conn: usize, table: usize) -> SimRow { + let mut row = self.make_row(rng, table); + while self.visible_rows(conn, table).iter().any(|candidate| candidate == &row) { + row = self.make_row(rng, table); + } + row + } + pub(crate) fn active_writer(&self) -> Option { self.active_writer } @@ -89,6 +99,12 @@ impl GenerationModel { } } + pub(crate) fn batch_insert(&mut self, conn: usize, table: usize, rows: &[SimRow]) { + for row in rows { + self.insert(conn, table, row.clone()); + } + } + pub(crate) fn delete(&mut self, conn: usize, table: usize, row: SimRow) { let pending = &mut self.connections[conn]; if pending.in_tx { @@ -101,6 +117,12 @@ impl GenerationModel { } } + pub(crate) fn batch_delete(&mut self, conn: usize, table: usize, rows: &[SimRow]) { + for row in rows { + self.delete(conn, table, row.clone()); + } + } + pub(crate) fn commit(&mut self, conn: usize) { let pending = &mut self.connections[conn]; let inserts = std::mem::take(&mut pending.staged_inserts); @@ -154,8 +176,11 @@ impl ExpectedModel { } pub fn apply(&mut self, interaction: &TableWorkloadInteraction) { - match interaction { - TableWorkloadInteraction::BeginTx { conn } => { + if !matches!(interaction.expected, ExpectedResult::Ok) { + return; + } + match &interaction.op { + TableOperation::BeginTx { conn } => { assert!( self.active_writer.is_none(), "multiple concurrent writers in expected model" @@ -163,7 +188,7 @@ impl ExpectedModel { self.connections[*conn].in_tx = true; self.active_writer = Some(*conn); } - TableWorkloadInteraction::CommitTx { conn } => { + TableOperation::CommitTx { conn } => { assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in expected model"); let state = &mut self.connections[*conn]; for (table, row) in state.staged_deletes.drain(..) { @@ -175,7 +200,7 @@ impl ExpectedModel { state.in_tx = false; self.active_writer = None; } - TableWorkloadInteraction::RollbackTx { conn } => { + TableOperation::RollbackTx { conn } => { assert_eq!( self.active_writer, Some(*conn), @@ -187,26 +212,87 @@ impl ExpectedModel { state.in_tx = false; self.active_writer = None; } - TableWorkloadInteraction::Insert { conn, table, row } => { - let state = &mut self.connections[*conn]; - if state.in_tx { - state.staged_inserts.push((*table, row.clone())); - } else { - self.committed[*table].push(row.clone()); + TableOperation::Insert { conn, table, row } => { + self.insert(*conn, *table, row.clone()); + } + TableOperation::Delete { conn, table, row } => { + self.delete(*conn, *table, row.clone()); + } + TableOperation::BatchInsert { conn, table, rows } => { + for row in rows { + self.insert(*conn, *table, row.clone()); } } - TableWorkloadInteraction::Delete { conn, table, row } => { - let state = &mut self.connections[*conn]; - if state.in_tx { - state - .staged_inserts - .retain(|(pending_table, candidate)| !(*pending_table == *table && *candidate == *row)); - state.staged_deletes.push((*table, row.clone())); - } else { - self.committed[*table].retain(|candidate| *candidate != *row); + TableOperation::BatchDelete { conn, table, rows } => { + for row in rows { + self.delete(*conn, *table, row.clone()); } } + TableOperation::Reinsert { conn, table, row } => { + self.delete(*conn, *table, row.clone()); + self.insert(*conn, *table, row.clone()); + } + TableOperation::DuplicateInsert { .. } + | TableOperation::DeleteMissing { .. } + | TableOperation::PointLookup { .. } + | TableOperation::PredicateCount { .. } + | TableOperation::RangeScan { .. } + | TableOperation::FullScan { .. } => {} + } + } + + pub fn visible_rows(&self, conn: usize, table: usize) -> Vec { + let mut rows = self.committed[table].clone(); + let pending = &self.connections[conn]; + for (pending_table, row) in &pending.staged_deletes { + if *pending_table == table { + rows.retain(|candidate| candidate != row); + } + } + for (pending_table, row) in &pending.staged_inserts { + if *pending_table == table { + rows.push(row.clone()); + } } + rows + } + + pub fn lookup_by_id(&self, conn: usize, table: usize, id: u64) -> Option { + self.visible_rows(conn, table) + .into_iter() + .find(|row| row.id() == Some(id)) + } + + pub fn predicate_count(&self, conn: usize, table: usize, col: u16, value: &AlgebraicValue) -> usize { + self.visible_rows(conn, table) + .into_iter() + .filter(|row| row.values.get(col as usize) == Some(value)) + .count() + } + + pub fn range_scan( + &self, + conn: usize, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + ) -> Vec { + let mut rows = self + .visible_rows(conn, table) + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + bound_contains_lower(lower, &key) && bound_contains_upper(upper, &key) + }) + .collect::>(); + rows.sort_by(|lhs, rhs| { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + rows } pub fn committed_rows(mut self) -> Vec> { @@ -215,4 +301,41 @@ impl ExpectedModel { } self.committed } + + fn insert(&mut self, conn: usize, table: usize, row: SimRow) { + let state = &mut self.connections[conn]; + if state.in_tx { + state.staged_inserts.push((table, row)); + } else { + self.committed[table].push(row); + } + } + + fn delete(&mut self, conn: usize, table: usize, row: SimRow) { + let state = &mut self.connections[conn]; + if state.in_tx { + state + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == table && *candidate == row)); + state.staged_deletes.push((table, row)); + } else { + self.committed[table].retain(|candidate| *candidate != row); + } + } +} + +fn bound_contains_lower(bound: &Bound, key: &AlgebraicValue) -> bool { + match bound { + Bound::Included(value) => key >= value, + Bound::Excluded(value) => key > value, + Bound::Unbounded => true, + } +} + +fn bound_contains_upper(bound: &Bound, key: &AlgebraicValue) -> bool { + match bound { + Bound::Included(value) => key <= value, + Bound::Excluded(value) => key < value, + Bound::Unbounded => true, + } } diff --git a/crates/dst/src/workload/table_ops/scenarios/banking.rs b/crates/dst/src/workload/table_ops/scenarios/banking.rs index 9039fa3738f..31f56735110 100644 --- a/crates/dst/src/workload/table_ops/scenarios/banking.rs +++ b/crates/dst/src/workload/table_ops/scenarios/banking.rs @@ -78,16 +78,8 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { let mirror = row.clone(); planner.insert(conn, 0, row.clone()); planner.insert(conn, 1, mirror.clone()); - planner.push_interaction(TableWorkloadInteraction::Insert { - conn, - table: 0, - row: row.clone(), - }); - planner.push_interaction(TableWorkloadInteraction::Insert { - conn, - table: 1, - row: mirror.clone(), - }); + planner.push_interaction(TableWorkloadInteraction::insert(conn, 0, row.clone())); + planner.push_interaction(TableWorkloadInteraction::insert(conn, 1, mirror.clone())); return; } @@ -95,14 +87,6 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { let mirror = row.clone(); planner.delete(conn, 0, row.clone()); planner.delete(conn, 1, mirror.clone()); - planner.push_interaction(TableWorkloadInteraction::Delete { - conn, - table: 0, - row: row.clone(), - }); - planner.push_interaction(TableWorkloadInteraction::Delete { - conn, - table: 1, - row: mirror.clone(), - }); + planner.push_interaction(TableWorkloadInteraction::delete(conn, 0, row.clone())); + planner.push_interaction(TableWorkloadInteraction::delete(conn, 1, mirror.clone())); } diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index 4b103584f12..a1471d41703 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -1,3 +1,5 @@ +use std::ops::Bound; + use spacetimedb_sats::AlgebraicType; use crate::{ @@ -142,17 +144,138 @@ fn fill_pending_with_tuning(planner: &mut ScenarioPlanner<'_>, conn: usize, tuni let table = planner.choose_table(); let visible_rows = planner.visible_rows(conn, table); + if emit_query(planner, conn, table, &visible_rows) { + return; + } + if planner.roll_percent(5) { + let row = planner.absent_row(conn, table); + planner.push_interaction(TableWorkloadInteraction::delete_missing(conn, table, row)); + return; + } let choose_insert = visible_rows.is_empty() || planner.roll_percent(tuning.insert_pct); if choose_insert { + if planner.roll_percent(10) { + let count = 2 + planner.choose_index(3); + let rows = (0..count).map(|_| planner.make_row(table)).collect::>(); + planner.batch_insert(conn, table, &rows); + planner.push_interaction(TableWorkloadInteraction::batch_insert(conn, table, rows)); + return; + } let row = planner.make_row(table); planner.insert(conn, table, row.clone()); - planner.push_interaction(TableWorkloadInteraction::Insert { conn, table, row }); + planner.push_interaction(TableWorkloadInteraction::insert(conn, table, row)); + return; + } + + if visible_rows.len() >= 2 && planner.roll_percent(10) { + let count = 2 + planner.choose_index(visible_rows.len().min(3) - 1); + let mut candidates = visible_rows.clone(); + let mut rows = Vec::with_capacity(count); + for _ in 0..count { + let idx = planner.choose_index(candidates.len()); + rows.push(candidates.remove(idx)); + } + planner.batch_delete(conn, table, &rows); + planner.push_interaction(TableWorkloadInteraction::batch_delete(conn, table, rows)); + return; + } + if planner.roll_percent(6) { + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.reinsert(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::reinsert(conn, table, row)); return; } let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); planner.delete(conn, table, row.clone()); - planner.push_interaction(TableWorkloadInteraction::Delete { conn, table, row }); + planner.push_interaction(TableWorkloadInteraction::delete(conn, table, row)); +} + +fn emit_query( + planner: &mut ScenarioPlanner<'_>, + conn: usize, + table: usize, + visible_rows: &[crate::schema::SimRow], +) -> bool { + if !planner.roll_percent(25) { + return false; + } + if visible_rows.is_empty() { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + return true; + } + + match planner.choose_index(4) { + 0 => { + let row = &visible_rows[planner.choose_index(visible_rows.len())]; + if let Some(id) = row.id() { + planner.push_interaction(TableWorkloadInteraction::point_lookup(conn, table, id)); + true + } else { + false + } + } + 1 => { + let col = choose_predicate_col(planner, table); + let row = &visible_rows[planner.choose_index(visible_rows.len())]; + if let Some(value) = row.values.get(col as usize).cloned() { + planner.push_interaction(TableWorkloadInteraction::predicate_count(conn, table, col, value)); + true + } else { + false + } + } + 2 => { + let extra_indexes = planner.table_plan(table).extra_indexes.clone(); + let Some(cols) = extra_indexes + .into_iter() + .find(|cols| range_cols_supported(planner, table, cols)) + else { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + return true; + }; + let mut rows = visible_rows.to_vec(); + rows.sort_by(|lhs, rhs| { + lhs.project_key(&cols) + .to_algebraic_value() + .cmp(&rhs.project_key(&cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + let lower = rows[0].project_key(&cols).to_algebraic_value(); + let upper = rows[rows.len() - 1].project_key(&cols).to_algebraic_value(); + planner.push_interaction(TableWorkloadInteraction::range_scan( + conn, + table, + cols, + Bound::Included(lower), + Bound::Included(upper), + )); + true + } + _ => { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + true + } + } +} + +fn choose_predicate_col(planner: &mut ScenarioPlanner<'_>, table: usize) -> u16 { + let column_count = planner.table_plan(table).columns.len(); + if column_count <= 1 { + 0 + } else { + 1 + planner.choose_index(column_count - 1) as u16 + } +} + +fn range_cols_supported(planner: &ScenarioPlanner<'_>, table: usize, cols: &[u16]) -> bool { + cols.iter().all(|col| { + planner + .table_plan(table) + .columns + .get(*col as usize) + .is_some_and(|column| is_range_compatible(&column.ty)) + }) } fn is_range_compatible(ty: &AlgebraicType) -> bool { diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index e036da63819..f308639cfb1 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -1,3 +1,7 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + use crate::{ schema::{SchemaPlan, SimRow}, seed::DstRng, @@ -17,12 +21,188 @@ pub(crate) trait TableScenario: Clone { /// One generated workload step. #[derive(Clone, Debug, Eq, PartialEq)] -pub enum TableWorkloadInteraction { - BeginTx { conn: usize }, - CommitTx { conn: usize }, - RollbackTx { conn: usize }, - Insert { conn: usize, table: usize, row: SimRow }, - Delete { conn: usize, table: usize, row: SimRow }, +pub struct PlannedInteraction { + pub op: TableOperation, + pub expected: ExpectedResult, +} + +pub type TableWorkloadInteraction = PlannedInteraction; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum TableOperation { + BeginTx { + conn: usize, + }, + CommitTx { + conn: usize, + }, + RollbackTx { + conn: usize, + }, + Insert { + conn: usize, + table: usize, + row: SimRow, + }, + Delete { + conn: usize, + table: usize, + row: SimRow, + }, + DuplicateInsert { + conn: usize, + table: usize, + row: SimRow, + }, + DeleteMissing { + conn: usize, + table: usize, + row: SimRow, + }, + BatchInsert { + conn: usize, + table: usize, + rows: Vec, + }, + BatchDelete { + conn: usize, + table: usize, + rows: Vec, + }, + Reinsert { + conn: usize, + table: usize, + row: SimRow, + }, + PointLookup { + conn: usize, + table: usize, + id: u64, + }, + PredicateCount { + conn: usize, + table: usize, + col: u16, + value: AlgebraicValue, + }, + RangeScan { + conn: usize, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + }, + FullScan { + conn: usize, + table: usize, + }, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ExpectedResult { + Ok, + Err(ExpectedErrorKind), +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ExpectedErrorKind { + UniqueConstraintViolation, + MissingRow, +} + +impl PlannedInteraction { + pub fn ok(op: TableOperation) -> Self { + Self { + op, + expected: ExpectedResult::Ok, + } + } + + pub fn expected_err(op: TableOperation, kind: ExpectedErrorKind) -> Self { + Self { + op, + expected: ExpectedResult::Err(kind), + } + } + + pub fn begin_tx(conn: usize) -> Self { + Self::ok(TableOperation::BeginTx { conn }) + } + + pub fn commit_tx(conn: usize) -> Self { + Self::ok(TableOperation::CommitTx { conn }) + } + + pub fn rollback_tx(conn: usize) -> Self { + Self::ok(TableOperation::RollbackTx { conn }) + } + + pub fn insert(conn: usize, table: usize, row: SimRow) -> Self { + Self::ok(TableOperation::Insert { conn, table, row }) + } + + pub fn delete(conn: usize, table: usize, row: SimRow) -> Self { + Self::ok(TableOperation::Delete { conn, table, row }) + } + + pub fn duplicate_insert(conn: usize, table: usize, row: SimRow) -> Self { + Self::expected_err( + TableOperation::DuplicateInsert { conn, table, row }, + ExpectedErrorKind::UniqueConstraintViolation, + ) + } + + pub fn delete_missing(conn: usize, table: usize, row: SimRow) -> Self { + Self::expected_err( + TableOperation::DeleteMissing { conn, table, row }, + ExpectedErrorKind::MissingRow, + ) + } + + pub fn batch_insert(conn: usize, table: usize, rows: Vec) -> Self { + Self::ok(TableOperation::BatchInsert { conn, table, rows }) + } + + pub fn batch_delete(conn: usize, table: usize, rows: Vec) -> Self { + Self::ok(TableOperation::BatchDelete { conn, table, rows }) + } + + pub fn reinsert(conn: usize, table: usize, row: SimRow) -> Self { + Self::ok(TableOperation::Reinsert { conn, table, row }) + } + + pub fn point_lookup(conn: usize, table: usize, id: u64) -> Self { + Self::ok(TableOperation::PointLookup { conn, table, id }) + } + + pub fn predicate_count(conn: usize, table: usize, col: u16, value: AlgebraicValue) -> Self { + Self::ok(TableOperation::PredicateCount { + conn, + table, + col, + value, + }) + } + + pub fn range_scan( + conn: usize, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + ) -> Self { + Self::ok(TableOperation::RangeScan { + conn, + table, + cols, + lower, + upper, + }) + } + + pub fn full_scan(conn: usize, table: usize) -> Self { + Self::ok(TableOperation::FullScan { conn, table }) + } } /// Final state gathered from a table-workload engine after execution ends. diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 3bf1921e8a8..c3a36f8694f 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -11,7 +11,12 @@ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; use scopeguard::ScopeGuard; -use spacetimedb_commitlog::{error, payload::Txdata, Commit, Commitlog, Decoder, Encode, Transaction}; +use spacetimedb_commitlog::{ + error, + payload::Txdata, + repo::{Fs, Repo, RepoWithoutLockFile}, + Commit, Commitlog, Decoder, Encode, Transaction, +}; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; use thiserror::Error; @@ -83,9 +88,12 @@ pub enum OpenError { /// /// Note, however, that instantiating `T` to a different type may require to /// change the log format version! -pub struct Local { +pub struct Local +where + R: Repo, +{ /// The [`Commitlog`] this [`Durability`] and [`History`] impl wraps. - clog: Arc>>, + clog: Arc, R>>, /// The durable transaction offset, as reported by the background /// [`FlushAndSyncTask`]. durable_offset: watch::Receiver>, @@ -106,7 +114,7 @@ pub struct Local { actor: Mutex>>, } -impl Local { +impl Local { /// Create a [`Local`] instance at the `replica_dir`. /// /// `replica_dir` must already exist. @@ -132,6 +140,21 @@ impl Local { opts.commitlog, on_new_segment, )?); + Self::open_inner(clog, rt, opts, Some(lock)) + } +} + +impl Local +where + T: Encode + Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ + fn open_inner( + clog: Arc, R>>, + rt: tokio::runtime::Handle, + opts: Options, + lock: Option, + ) -> Result { let queue_capacity = opts.queue_capacity(); let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); @@ -146,7 +169,7 @@ impl Local { batch_capacity: opts.batch_capacity, - lock, + _lock: lock, } .run(txdata_rx), ); @@ -161,12 +184,29 @@ impl Local { } /// Obtain a read-only copy of the durable state that implements [History]. - pub fn as_history(&self) -> impl History> + use { + pub fn as_history(&self) -> impl History> + use { self.clog.clone() } } -impl Local { +impl Local +where + T: Encode + Send + Sync + 'static, + R: RepoWithoutLockFile + Send + Sync + 'static, +{ + /// Create a [`Local`] instance backed by the provided commitlog repo. + pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { + info!("open local durability"); + let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); + Self::open_inner(clog, rt, opts, None) + } +} + +impl Local +where + T: Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ /// Inspect how many transactions added via [`Self::append_tx`] are pending /// to be applied to the underlying [`Commitlog`]. pub fn queue_depth(&self) -> u64 { @@ -174,7 +214,7 @@ impl Local { } /// Obtain an iterator over the [`Commit`]s in the underlying log. - pub fn commits_from(&self, offset: TxOffset) -> impl Iterator> + use { + pub fn commits_from(&self, offset: TxOffset) -> impl Iterator> + use { self.clog.commits_from(offset).map_ok(Commit::from) } @@ -187,26 +227,34 @@ impl Local { pub fn compress_segments(&self, offsets: &[TxOffset]) -> io::Result<()> { self.clog.compress_segments(offsets) } +} +impl Local { /// Get the size on disk of the underlying [`Commitlog`]. pub fn size_on_disk(&self) -> io::Result { self.clog.size_on_disk() } } -struct Actor { - clog: Arc>>, +struct Actor +where + R: Repo, +{ + clog: Arc, R>>, durable_offset: watch::Sender>, queue_depth: Arc, batch_capacity: NonZeroUsize, - #[allow(unused)] - lock: LockedFile, + _lock: Option, } -impl Actor { +impl Actor +where + T: Encode + Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ #[instrument(name = "durability::local::actor", skip_all)] async fn run(self, transactions_rx: async_channel::Receiver>>) { info!("starting durability actor"); @@ -287,7 +335,11 @@ impl Actor { } } -impl Durability for Local { +impl Durability for Local +where + T: Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ type TxData = Txdata; fn append_tx(&self, tx: PreparedTx) { @@ -332,7 +384,11 @@ impl Durability for Local { } } -impl History for Commitlog> { +impl History for Commitlog, R> +where + T: Encode + 'static, + R: Repo + Send + Sync + 'static, +{ type TxData = Txdata; fn fold_transactions_from(&self, offset: TxOffset, decoder: D) -> Result<(), D::Error> From 858e09aac6954acb041bdf6df8c8c4b0e5943270 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 30 Apr 2026 02:41:46 +0530 Subject: [PATCH 21/74] properties better --- crates/dst/src/core/mod.rs | 30 ++- crates/dst/src/targets/properties.rs | 149 +++++++++++++- .../src/targets/relational_db_commitlog.rs | 191 ++++++++++-------- .../src/workload/commitlog_ops/generation.rs | 4 +- 4 files changed, 280 insertions(+), 94 deletions(-) diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index b7ad15769c5..8f51f749e84 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -22,20 +22,36 @@ pub trait WorkloadPlan { /// Target execution contract over a workload interaction stream. pub trait TargetEngine { + type Observation; type Outcome; type Error; - async fn execute_interaction(&mut self, interaction: &I) -> Result<(), Self::Error>; + async fn execute_interaction(&mut self, interaction: &I) -> Result; fn finish(&mut self); fn collect_outcome(&mut self) -> anyhow::Result; } -/// Shared streaming runner. -pub async fn run_streaming(mut source: S, mut engine: E, cfg: RunConfig) -> anyhow::Result +/// Property runtime contract for the shared streaming runner. +pub trait StreamingProperties +where + E: TargetEngine, +{ + fn observe(&mut self, engine: &E, interaction: &I, observation: &O) -> Result<(), String>; + fn finish(&mut self, engine: &E, outcome: &E::Outcome) -> Result<(), String>; +} + +/// Shared streaming runner with property orchestration. +pub async fn run_streaming( + mut source: S, + mut engine: E, + mut properties: P, + cfg: RunConfig, +) -> anyhow::Result where I: Clone, S: NextInteractionSource, E: TargetEngine, + P: StreamingProperties, { let deadline = cfg.deadline(); let mut step = 0usize; @@ -46,13 +62,19 @@ where let Some(interaction) = source.next_interaction() else { break; }; - engine + let observation = engine .execute_interaction(&interaction) .await .map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; + properties + .observe(&engine, &interaction, &observation) + .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; step = step.saturating_add(1); } engine.finish(); let outcome = engine.collect_outcome()?; + properties + .finish(&engine, &outcome) + .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; Ok(outcome) } diff --git a/crates/dst/src/targets/properties.rs b/crates/dst/src/targets/properties.rs index 1aa959640f0..773da80e104 100644 --- a/crates/dst/src/targets/properties.rs +++ b/crates/dst/src/targets/properties.rs @@ -8,10 +8,14 @@ use std::ops::Bound; use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use crate::{ + core::StreamingProperties, schema::{SchemaPlan, SimRow}, - workload::table_ops::{ - ExpectedErrorKind, ExpectedModel, ExpectedResult, TableOperation, TableScenario, TableWorkloadInteraction, - TableWorkloadOutcome, + workload::{ + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, + table_ops::{ + ExpectedErrorKind, ExpectedModel, ExpectedResult, TableOperation, TableScenario, TableWorkloadInteraction, + TableWorkloadOutcome, + }, }, }; @@ -57,6 +61,59 @@ pub(crate) struct DynamicMigrationProbe { pub inserted_row: SimRow, } +#[derive(Clone, Debug)] +pub(crate) enum TableObservation { + Applied, + RowInserted { + conn: usize, + table: usize, + row: SimRow, + in_tx: bool, + }, + RowDeleted { + conn: usize, + table: usize, + row: SimRow, + in_tx: bool, + }, + ExpectedError(ExpectedErrorKind), + PointLookup { + conn: usize, + table: usize, + id: u64, + actual: Option, + }, + PredicateCount { + conn: usize, + table: usize, + col: u16, + value: AlgebraicValue, + actual: usize, + }, + RangeScan { + conn: usize, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + actual: Vec, + }, + FullScan { + conn: usize, + table: usize, + actual: Vec, + }, + CommitOrRollback, +} + +#[derive(Clone, Debug)] +pub(crate) enum CommitlogObservation { + Table(TableObservation), + Applied, + Skipped, + DynamicMigrationProbe(DynamicMigrationProbe), +} + #[derive(Clone, Debug)] pub(crate) struct PropertyModels { table: TableModel, @@ -490,6 +547,92 @@ impl PropertyRuntime { } Ok(()) } + + fn observe_table_observation( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + observation: &TableObservation, + ) -> Result<(), String> { + match observation { + TableObservation::Applied => {} + TableObservation::RowInserted { + conn, + table, + row, + in_tx, + } => self.on_insert(access, 0, *conn, *table, row, *in_tx)?, + TableObservation::RowDeleted { + conn, + table, + row, + in_tx, + } => self.on_delete(access, 0, *conn, *table, row, *in_tx)?, + TableObservation::ExpectedError(kind) => self.on_expected_error(access, *kind, interaction)?, + TableObservation::PointLookup { + conn, + table, + id, + actual, + } => self.on_point_lookup(access, *conn, *table, *id, actual)?, + TableObservation::PredicateCount { + conn, + table, + col, + value, + actual, + } => self.on_predicate_count(access, *conn, *table, *col, value, *actual)?, + TableObservation::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + } => self.on_range_scan(access, *conn, *table, cols, lower, upper, actual)?, + TableObservation::FullScan { conn, table, actual } => self.on_full_scan(access, *conn, *table, actual)?, + TableObservation::CommitOrRollback => {} + } + + self.on_table_interaction(access, interaction)?; + + if matches!(observation, TableObservation::CommitOrRollback) { + self.on_commit_or_rollback(access)?; + } + Ok(()) + } +} + +impl StreamingProperties for PropertyRuntime +where + E: crate::core::TargetEngine< + CommitlogInteraction, + Observation = CommitlogObservation, + Outcome = CommitlogWorkloadOutcome, + Error = String, + > + TargetPropertyAccess, +{ + fn observe( + &mut self, + engine: &E, + interaction: &CommitlogInteraction, + observation: &CommitlogObservation, + ) -> Result<(), String> { + match (interaction, observation) { + (CommitlogInteraction::Table(table_interaction), CommitlogObservation::Table(table_observation)) => { + self.observe_table_observation(engine, table_interaction, table_observation) + } + (_, CommitlogObservation::DynamicMigrationProbe(probe)) => self.on_dynamic_migration_probe(engine, probe), + (_, CommitlogObservation::Applied | CommitlogObservation::Skipped) => Ok(()), + (other, observation) => Err(format!( + "observation {observation:?} does not match interaction {other:?}" + )), + } + } + + fn finish(&mut self, engine: &E, outcome: &CommitlogWorkloadOutcome) -> Result<(), String> { + self.on_table_workload_finish(engine, &outcome.table) + } } struct RuleEntry { diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 87f50666ccb..800679ec0b8 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -32,7 +32,9 @@ use crate::{ core::{self, TargetEngine}, schema::{SchemaPlan, SimRow}, seed::DstSeed, - targets::properties::{DynamicMigrationProbe, PropertyRuntime, TargetPropertyAccess}, + targets::properties::{ + CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableObservation, TargetPropertyAccess, + }, workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, table_ops::{ @@ -43,12 +45,34 @@ use crate::{ }; pub type RelationalDbCommitlogOutcome = CommitlogWorkloadOutcome; +type RelationalDbCommitlogSource = crate::workload::commitlog_ops::NextInteractionGeneratorComposite; +type RelationalDbCommitlogProperties = PropertyRuntime; pub async fn run_generated_with_config_and_scenario( seed: DstSeed, scenario: TableScenarioId, config: RunConfig, ) -> anyhow::Result { + let (source, engine, properties) = build(seed, scenario, &config)?; + let outcome = core::run_streaming(source, engine, properties, config).await?; + info!( + applied_steps = outcome.applied_steps, + durable_commit_count = outcome.durable_commit_count, + replay_table_count = outcome.replay_table_count, + "relational_db_commitlog complete" + ); + Ok(outcome) +} + +fn build( + seed: DstSeed, + scenario: TableScenarioId, + config: &RunConfig, +) -> anyhow::Result<( + RelationalDbCommitlogSource, + RelationalDbEngine, + RelationalDbCommitlogProperties, +)> { let mut connection_rng = seed.fork(121).rng(); let num_connections = connection_rng.index(3) + 1; let mut schema_rng = seed.fork(122).rng(); @@ -60,15 +84,9 @@ pub async fn run_generated_with_config_and_scenario( num_connections, config.max_interactions_or_default(usize::MAX), ); - let engine = RelationalDbEngine::new(seed, scenario, &schema, num_connections)?; - let outcome = core::run_streaming(generator, engine, config).await?; - info!( - applied_steps = outcome.applied_steps, - durable_commit_count = outcome.durable_commit_count, - replay_table_count = outcome.replay_table_count, - "relational_db_commitlog complete" - ); - Ok(outcome) + let engine = RelationalDbEngine::new(seed, &schema, num_connections)?; + let properties = PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections); + Ok((generator, engine, properties)) } #[derive(Clone, Debug)] @@ -89,7 +107,6 @@ struct RelationalDbEngine { last_observed_durable_offset: Option, last_durable_snapshot: DurableSnapshot, pending_snapshot_capture: bool, - properties: PropertyRuntime, durability: Arc, runtime_handle: tokio::runtime::Handle, commitlog_repo: MemoryCommitlogRepo, @@ -99,12 +116,7 @@ struct RelationalDbEngine { type DurableSnapshot = BTreeMap>; impl RelationalDbEngine { - fn new( - seed: DstSeed, - scenario: TableScenarioId, - schema: &SchemaPlan, - num_connections: usize, - ) -> anyhow::Result { + fn new(seed: DstSeed, schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { let (db, runtime_handle, commitlog_repo, durability, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; let mut this = Self { db: Some(db), @@ -116,7 +128,6 @@ impl RelationalDbEngine { last_observed_durable_offset: None, last_durable_snapshot: BTreeMap::new(), pending_snapshot_capture: false, - properties: PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections), durability, runtime_handle, commitlog_repo, @@ -181,22 +192,25 @@ impl RelationalDbEngine { .map_err(|err| format!("install base schema commit failed: {err}")) } - async fn execute(&mut self, interaction: &CommitlogInteraction) -> Result<(), String> { + async fn execute(&mut self, interaction: &CommitlogInteraction) -> Result { self.step = self.step.saturating_add(1); match interaction { - CommitlogInteraction::Table(op) => self.execute_table_op(op), + CommitlogInteraction::Table(op) => self.execute_table_op(op).map(CommitlogObservation::Table), CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), - CommitlogInteraction::ChaosSync => self.sync_and_snapshot(true), + CommitlogInteraction::ChaosSync => { + self.sync_and_snapshot(true)?; + Ok(CommitlogObservation::Applied) + } CommitlogInteraction::CloseReopen => self.close_and_reopen().await, } } - async fn close_and_reopen(&mut self) -> Result<(), String> { + async fn close_and_reopen(&mut self) -> Result { if self.execution.active_writer.is_some() || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) { trace!("skip close/reopen while transaction is open"); - return Ok(()); + return Ok(CommitlogObservation::Skipped); } self.sync_and_snapshot(true)?; @@ -247,7 +261,7 @@ impl RelationalDbEngine { dynamic_tables = self.dynamic_tables.len(), "reopened relational db from durable history" ); - Ok(()) + Ok(CommitlogObservation::Applied) } fn rebuild_table_handles_after_reopen(&mut self) -> Result<(), String> { @@ -284,7 +298,7 @@ impl RelationalDbEngine { Ok(()) } - fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String> { + fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result { match std::panic::catch_unwind(AssertUnwindSafe(|| self.execute_table_op_inner(interaction))) { Ok(result) => result, Err(payload) => Err(format!( @@ -294,9 +308,9 @@ impl RelationalDbEngine { } } - fn execute_table_op_inner(&mut self, interaction: &TableWorkloadInteraction) -> Result<(), String> { + fn execute_table_op_inner(&mut self, interaction: &TableWorkloadInteraction) -> Result { trace!(step = self.step, ?interaction, "table interaction"); - let applied: Result<(), String> = match &interaction.op { + match &interaction.op { TableOperation::BeginTx { conn } => { self.execution.ensure_known_connection(*conn)?; if self.execution.tx_by_connection[*conn].is_some() { @@ -312,7 +326,7 @@ impl RelationalDbEngine { .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), ); self.execution.active_writer = Some(*conn); - Ok(()) + Ok(TableObservation::Applied) } TableOperation::CommitTx { conn } => { self.execution.ensure_writer_owner(*conn, "commit")?; @@ -324,8 +338,7 @@ impl RelationalDbEngine { .map_err(|err| format!("commit interaction failed: {err}"))?; self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; - self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; - Ok(()) + Ok(TableObservation::CommitOrRollback) } TableOperation::RollbackTx { conn } => { self.execution.ensure_writer_owner(*conn, "rollback")?; @@ -335,8 +348,7 @@ impl RelationalDbEngine { let _ = self.db()?.rollback_mut_tx(tx); self.execution.active_writer = None; self.capture_pending_snapshot_if_idle()?; - self.with_property_runtime(|runtime, access| runtime.on_commit_or_rollback(access))?; - Ok(()) + Ok(TableObservation::CommitOrRollback) } TableOperation::Insert { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); @@ -355,11 +367,12 @@ impl RelationalDbEngine { if !in_tx { self.sync_and_snapshot(false)?; } - let step = self.step as u64; - self.with_property_runtime(|runtime, access| { - runtime.on_insert(access, step, *conn, *table, &inserted_row, in_tx) - })?; - Ok(()) + Ok(TableObservation::RowInserted { + conn: *conn, + table: *table, + row: inserted_row, + in_tx, + }) } TableOperation::Delete { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); @@ -377,11 +390,12 @@ impl RelationalDbEngine { if !in_tx { self.sync_and_snapshot(false)?; } - let step = self.step as u64; - self.with_property_runtime(|runtime, access| { - runtime.on_delete(access, step, *conn, *table, row, in_tx) - })?; - Ok(()) + Ok(TableObservation::RowDeleted { + conn: *conn, + table: *table, + row: row.clone(), + in_tx, + }) } TableOperation::DuplicateInsert { conn, table, row } => { let outcome = self.with_mut_tx(*conn, |engine, tx| { @@ -400,9 +414,9 @@ impl RelationalDbEngine { } })?; match outcome { - Ok(()) => self.with_property_runtime(|runtime, access| { - runtime.on_expected_error(access, ExpectedErrorKind::UniqueConstraintViolation, interaction) - }), + Ok(()) => Ok(TableObservation::ExpectedError( + ExpectedErrorKind::UniqueConstraintViolation, + )), Err(err) => Err(format!("[ExpectedErrorMatches] {err}; interaction={interaction:?}")), } } @@ -415,9 +429,7 @@ impl RelationalDbEngine { Ok(engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()])) })?; if deleted == 0 { - self.with_property_runtime(|runtime, access| { - runtime.on_expected_error(access, ExpectedErrorKind::MissingRow, interaction) - }) + Ok(TableObservation::ExpectedError(ExpectedErrorKind::MissingRow)) } else { Err(format!( "[ExpectedErrorDoesNotMutate] missing delete removed {deleted} rows; interaction={interaction:?}" @@ -443,7 +455,7 @@ impl RelationalDbEngine { if !in_tx { self.sync_and_snapshot(false)?; } - Ok(()) + Ok(TableObservation::Applied) } TableOperation::BatchDelete { conn, table, rows } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); @@ -463,7 +475,7 @@ impl RelationalDbEngine { if !in_tx { self.sync_and_snapshot(false)?; } - Ok(()) + Ok(TableObservation::Applied) } TableOperation::Reinsert { conn, table, row } => { let in_tx = self.execution.tx_by_connection[*conn].is_some(); @@ -486,12 +498,15 @@ impl RelationalDbEngine { if !in_tx { self.sync_and_snapshot(false)?; } - Ok(()) + Ok(TableObservation::Applied) } TableOperation::PointLookup { conn, table, id } => { let actual = self.lookup_base_row(*conn, *table, *id)?; - self.with_property_runtime(|runtime, access| { - runtime.on_point_lookup(access, *conn, *table, *id, &actual) + Ok(TableObservation::PointLookup { + conn: *conn, + table: *table, + id: *id, + actual, }) } TableOperation::PredicateCount { @@ -501,8 +516,12 @@ impl RelationalDbEngine { value, } => { let actual = self.count_by_col_eq_in_connection(*conn, *table, *col, value)?; - self.with_property_runtime(|runtime, access| { - runtime.on_predicate_count(access, *conn, *table, *col, value, actual) + Ok(TableObservation::PredicateCount { + conn: *conn, + table: *table, + col: *col, + value: value.clone(), + actual, }) } TableOperation::RangeScan { @@ -513,17 +532,24 @@ impl RelationalDbEngine { upper, } => { let actual = self.range_scan_in_connection(*conn, *table, cols, lower.clone(), upper.clone())?; - self.with_property_runtime(|runtime, access| { - runtime.on_range_scan(access, *conn, *table, cols, lower, upper, &actual) + Ok(TableObservation::RangeScan { + conn: *conn, + table: *table, + cols: cols.clone(), + lower: lower.clone(), + upper: upper.clone(), + actual, }) } TableOperation::FullScan { conn, table } => { let actual = self.collect_rows_in_connection(*conn, *table)?; - self.with_property_runtime(|runtime, access| runtime.on_full_scan(access, *conn, *table, &actual)) + Ok(TableObservation::FullScan { + conn: *conn, + table: *table, + actual, + }) } - }; - applied?; - self.with_property_runtime(|runtime, access| runtime.on_table_interaction(access, interaction)) + } } fn with_mut_tx( @@ -560,14 +586,14 @@ impl RelationalDbEngine { Ok(value) } - fn create_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + fn create_dynamic_table(&mut self, conn: usize, slot: u32) -> Result { if self.execution.active_writer.is_some() { trace!( step = self.step, slot, "skip create dynamic table while transaction is open" ); - return Ok(()); + return Ok(CommitlogObservation::Skipped); } let conn = self.normalize_conn(conn); debug!(step = self.step, conn, slot, "create dynamic table"); @@ -599,17 +625,18 @@ impl RelationalDbEngine { ); Ok(()) })?; - self.sync_and_snapshot(false) + self.sync_and_snapshot(false)?; + Ok(CommitlogObservation::Applied) } - fn drop_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + fn drop_dynamic_table(&mut self, conn: usize, slot: u32) -> Result { if self.execution.active_writer.is_some() { trace!( step = self.step, slot, "skip drop dynamic table while transaction is open" ); - return Ok(()); + return Ok(CommitlogObservation::Skipped); } let conn = self.normalize_conn(conn); debug!(step = self.step, conn, slot, "drop dynamic table"); @@ -625,17 +652,18 @@ impl RelationalDbEngine { } Ok(()) })?; - self.sync_and_snapshot(false) + self.sync_and_snapshot(false)?; + Ok(CommitlogObservation::Applied) } - fn migrate_dynamic_table(&mut self, conn: usize, slot: u32) -> Result<(), String> { + fn migrate_dynamic_table(&mut self, conn: usize, slot: u32) -> Result { if self.execution.active_writer.is_some() { trace!( step = self.step, slot, "skip migrate dynamic table while transaction is open" ); - return Ok(()); + return Ok(CommitlogObservation::Skipped); } let conn = self.normalize_conn(conn); debug!(step = self.step, conn, slot, "migrate dynamic table"); @@ -683,10 +711,10 @@ impl RelationalDbEngine { inserted_row: inserted, })) })?; - if let Some(probe) = probe { - self.with_property_runtime(|runtime, access| runtime.on_dynamic_migration_probe(access, &probe))?; - } - self.sync_and_snapshot(false) + self.sync_and_snapshot(false)?; + Ok(probe + .map(CommitlogObservation::DynamicMigrationProbe) + .unwrap_or(CommitlogObservation::Skipped)) } fn normalize_conn(&self, conn: usize) -> usize { @@ -855,16 +883,6 @@ impl RelationalDbEngine { Ok(rows) } - fn with_property_runtime( - &mut self, - f: impl FnOnce(&mut PropertyRuntime, &Self) -> Result, - ) -> Result { - let mut runtime = std::mem::take(&mut self.properties); - let result = f(&mut runtime, self); - self.properties = runtime; - result - } - fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); @@ -901,7 +919,6 @@ impl RelationalDbEngine { self.capture_pending_snapshot_if_idle()?; self.sync_and_snapshot(true)?; let table = self.collect_table_outcome()?; - self.with_property_runtime(|runtime, access| runtime.on_table_workload_finish(access, &table))?; let durable_commit_count = self .last_observed_durable_offset .map(|offset| (offset as usize).saturating_add(1)) @@ -983,10 +1000,14 @@ impl TargetPropertyAccess for RelationalDbEngine { } impl TargetEngine for RelationalDbEngine { + type Observation = CommitlogObservation; type Outcome = RelationalDbCommitlogOutcome; type Error = String; - async fn execute_interaction(&mut self, interaction: &CommitlogInteraction) -> Result<(), Self::Error> { + async fn execute_interaction( + &mut self, + interaction: &CommitlogInteraction, + ) -> Result { self.execute(interaction).await } diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index cdbbba9f552..098cc980d50 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -54,11 +54,11 @@ impl NextInteractionGeneratorComposite { if Percent::new(18).sample(&mut self.rng) { self.pending.push_back(CommitlogInteraction::ChaosSync); } - if Percent::new(4).sample(&mut self.rng) { + if Percent::new(1).sample(&mut self.rng) { self.pending.push_back(CommitlogInteraction::CloseReopen); } - if Percent::new(9).sample(&mut self.rng) { + if Percent::new(1).sample(&mut self.rng) { let conn = ConnectionChoice { connection_count: self.num_connections, } From c6fa1372f5e17f9b02ce1ddb7a820918769a8805 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 30 Apr 2026 19:39:08 +0530 Subject: [PATCH 22/74] improvements --- crates/core/src/util/jobs.rs | 29 ++- crates/dst/src/targets/buggified_repo.rs | 237 ++++++++++++++++++ crates/dst/src/targets/descriptor.rs | 44 +++- crates/dst/src/targets/mod.rs | 1 + .../src/targets/relational_db_commitlog.rs | 210 +++++++++++++++- crates/dst/src/workload/commitlog_ops/mod.rs | 5 +- .../dst/src/workload/commitlog_ops/types.rs | 62 +++++ .../table_ops/scenarios/random_crud.rs | 12 +- 8 files changed, 580 insertions(+), 20 deletions(-) create mode 100644 crates/dst/src/targets/buggified_repo.rs diff --git a/crates/core/src/util/jobs.rs b/crates/core/src/util/jobs.rs index 6f4f6b8bf73..b09f2db4d88 100644 --- a/crates/core/src/util/jobs.rs +++ b/crates/core/src/util/jobs.rs @@ -7,8 +7,9 @@ use futures::FutureExt; use indexmap::IndexMap; use smallvec::SmallVec; use spacetimedb_data_structures::map::HashMap; -use tokio::runtime; use tokio::sync::{mpsc, oneshot, watch}; +#[cfg(not(madsim))] +use tokio::runtime; use tracing::Instrument; use crate::util::thread_scheduling::apply_compute_thread_hint; @@ -289,10 +290,14 @@ pub struct SingleCoreExecutor { struct SingleCoreExecutorInner { /// The sending end of a channel over which we send jobs. job_tx: mpsc::UnboundedSender LocalBoxFuture<'static, ()> + Send>>, + #[cfg(madsim)] + /// Retains the allocation guard for the lifetime of the simulated executor. + _guard: LoadBalanceOnDropGuard, } impl SingleCoreExecutor { /// Spawn a `SingleCoreExecutor` on the given core. + #[cfg(not(madsim))] fn spawn(core: AllocatedJobCore) -> Self { let AllocatedJobCore { guard, mut pinner } = core; @@ -327,6 +332,28 @@ impl SingleCoreExecutor { Self { inner } } + /// Spawn a simulated `SingleCoreExecutor`. + /// + /// In simulation, job execution models the same logical single-core queue + /// without creating an OS thread or re-entering a Tokio runtime with + /// `Handle::block_on`. + #[cfg(madsim)] + fn spawn(core: AllocatedJobCore) -> Self { + let AllocatedJobCore { guard, pinner: _ } = core; + + let (job_tx, mut job_rx) = mpsc::unbounded_channel(); + + let inner = Arc::new(SingleCoreExecutorInner { job_tx, _guard: guard }); + + tokio::task::spawn_local(async move { + while let Some(job) = job_rx.recv().await { + tokio::task::spawn_local(job()); + } + }); + + Self { inner } + } + /// Create a `SingleCoreExecutor` which runs jobs in [`tokio::runtime::Handle::current`]. /// /// Callers should most likely instead construct a `SingleCoreExecutor` via [`JobCores::take`], diff --git a/crates/dst/src/targets/buggified_repo.rs b/crates/dst/src/targets/buggified_repo.rs new file mode 100644 index 00000000000..0c277b677fa --- /dev/null +++ b/crates/dst/src/targets/buggified_repo.rs @@ -0,0 +1,237 @@ +use std::{ + fmt, + io::{self, BufRead, Read, Seek, Write}, + time::Duration, +}; + +use spacetimedb_commitlog::{ + repo::{Repo, RepoWithoutLockFile, SegmentLen, SegmentReader, TxOffset, TxOffsetIndex, TxOffsetIndexMut}, + segment::FileLike, +}; + +const LATENCY_PROBABILITY: f64 = 0.35; +const LONG_LATENCY_PROBABILITY: f64 = 0.08; +const SHORT_IO_PROBABILITY: f64 = 0.08; + +/// DST-only repo wrapper that makes the in-memory commitlog backend behave less like RAM. +/// +/// Faults stay within normal file API semantics: calls may take deterministic simulated time +/// and `Read` / `Write` may complete partially. The wrapper deliberately avoids corruption or +/// crash-style partial persistence; those need a stronger durability model before we enable them. +#[derive(Clone, Debug)] +pub(crate) struct BuggifiedRepo { + inner: R, +} + +impl BuggifiedRepo { + pub(crate) fn new(inner: R) -> Self { + Self { inner } + } +} + +impl fmt::Display for BuggifiedRepo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}+buggified", self.inner) + } +} + +impl Repo for BuggifiedRepo { + type SegmentWriter = BuggifiedSegment; + type SegmentReader = BuggifiedReader; + + fn create_segment(&self, offset: u64) -> io::Result { + maybe_disk_latency(); + self.inner.create_segment(offset).map(BuggifiedSegment::new) + } + + fn open_segment_reader(&self, offset: u64) -> io::Result { + maybe_disk_latency(); + self.inner.open_segment_reader(offset).map(BuggifiedReader::new) + } + + fn open_segment_writer(&self, offset: u64) -> io::Result { + maybe_disk_latency(); + self.inner.open_segment_writer(offset).map(BuggifiedSegment::new) + } + + fn segment_file_path(&self, offset: u64) -> Option { + self.inner.segment_file_path(offset) + } + + fn remove_segment(&self, offset: u64) -> io::Result<()> { + maybe_disk_latency(); + self.inner.remove_segment(offset) + } + + fn compress_segment(&self, offset: u64) -> io::Result<()> { + maybe_disk_latency(); + self.inner.compress_segment(offset) + } + + fn existing_offsets(&self) -> io::Result> { + maybe_disk_latency(); + self.inner.existing_offsets() + } + + fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { + maybe_disk_latency(); + self.inner.create_offset_index(offset, cap) + } + + fn remove_offset_index(&self, offset: TxOffset) -> io::Result<()> { + maybe_disk_latency(); + self.inner.remove_offset_index(offset) + } + + fn get_offset_index(&self, offset: TxOffset) -> io::Result { + maybe_disk_latency(); + self.inner.get_offset_index(offset) + } +} + +impl RepoWithoutLockFile for BuggifiedRepo {} + +pub(crate) struct BuggifiedSegment { + inner: S, +} + +impl BuggifiedSegment { + fn new(inner: S) -> Self { + Self { inner } + } +} + +impl Read for BuggifiedSegment { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + maybe_disk_latency(); + let len = maybe_short_len(buf.len()); + self.inner.read(&mut buf[..len]) + } +} + +impl Write for BuggifiedSegment { + fn write(&mut self, buf: &[u8]) -> io::Result { + maybe_disk_latency(); + let len = maybe_short_len(buf.len()); + self.inner.write(&buf[..len]) + } + + fn flush(&mut self) -> io::Result<()> { + maybe_disk_latency(); + self.inner.flush() + } +} + +impl Seek for BuggifiedSegment { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + maybe_disk_latency(); + self.inner.seek(pos) + } +} + +impl SegmentLen for BuggifiedSegment { + fn segment_len(&mut self) -> io::Result { + maybe_disk_latency(); + self.inner.segment_len() + } +} + +impl FileLike for BuggifiedSegment { + fn fsync(&mut self) -> io::Result<()> { + maybe_disk_latency(); + self.inner.fsync() + } + + fn ftruncate(&mut self, tx_offset: u64, size: u64) -> io::Result<()> { + maybe_disk_latency(); + self.inner.ftruncate(tx_offset, size) + } +} + +pub(crate) struct BuggifiedReader { + inner: S, +} + +impl BuggifiedReader { + fn new(inner: S) -> Self { + Self { inner } + } +} + +impl Read for BuggifiedReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + maybe_disk_latency(); + let len = maybe_short_len(buf.len()); + self.inner.read(&mut buf[..len]) + } +} + +impl BufRead for BuggifiedReader { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + maybe_disk_latency(); + self.inner.fill_buf() + } + + fn consume(&mut self, amount: usize) { + self.inner.consume(amount); + } +} + +impl Seek for BuggifiedReader { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + maybe_disk_latency(); + self.inner.seek(pos) + } +} + +impl SegmentLen for BuggifiedReader { + fn segment_len(&mut self) -> io::Result { + maybe_disk_latency(); + self.inner.segment_len() + } +} + +impl SegmentReader for BuggifiedReader { + fn sealed(&self) -> bool { + self.inner.sealed() + } +} + +fn maybe_disk_latency() { + #[cfg(madsim)] + { + if madsim::buggify::buggify_with_prob(LATENCY_PROBABILITY) { + let latency = if madsim::buggify::buggify_with_prob(LONG_LATENCY_PROBABILITY) { + Duration::from_millis(25) + } else { + Duration::from_millis(1) + }; + madsim::time::advance(latency); + } + } + + #[cfg(not(madsim))] + { + let _ = (LATENCY_PROBABILITY, LONG_LATENCY_PROBABILITY, Duration::ZERO); + } +} + +fn maybe_short_len(len: usize) -> usize { + if len <= 1 { + return len; + } + + #[cfg(madsim)] + { + if madsim::buggify::buggify_with_prob(SHORT_IO_PROBABILITY) { + return (len / 2).max(1); + } + } + + #[cfg(not(madsim))] + { + let _ = SHORT_IO_PROBABILITY; + } + + len +} diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index e9514a70b4f..2e179a7026f 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -33,13 +33,53 @@ impl TargetDescriptor for RelationalDbCommitlogDescriptor { let outcome = crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config) .await?; + let alive_tasks = outcome + .runtime + .runtime_alive_tasks + .map(|count| count.to_string()) + .unwrap_or_else(|| "unknown".to_string()); Ok(format!( - "ok target={} seed={} steps={} durable_commits={} replay_tables={}", + "ok target={} seed={} steps={} schema_tables={} schema_columns={} schema_max_columns={} schema_indexes={} schema_extra_indexes={} durable_commits={} replay_tables={} table_ops={} creates={} drops={} migrates={} syncs={} reopens={} reopen_skipped={} skipped={} op_begin={} op_commit={} op_rollback={} op_insert={} op_delete={} op_dup_insert={} op_missing_delete={} op_batch_insert={} op_batch_delete={} op_reinsert={} op_point_lookup={} op_predicate_count={} op_range_scan={} op_full_scan={} tx_begin={} tx_commit={} tx_rollback={} auto_commit={} read_tx={} known_tasks={} durability_actors={} alive_tasks={}", Self::NAME, seed.0, outcome.applied_steps, + outcome.schema.initial_tables, + outcome.schema.initial_columns, + outcome.schema.max_columns_per_table, + outcome.schema.initial_indexes, + outcome.schema.extra_indexes, outcome.durable_commit_count, - outcome.replay_table_count + outcome.replay_table_count, + outcome.interactions.table, + outcome.interactions.create_dynamic_table, + outcome.interactions.drop_dynamic_table, + outcome.interactions.migrate_dynamic_table, + outcome.interactions.chaos_sync, + outcome.interactions.close_reopen_applied, + outcome.interactions.close_reopen_skipped, + outcome.interactions.skipped, + outcome.table_ops.begin_tx, + outcome.table_ops.commit_tx, + outcome.table_ops.rollback_tx, + outcome.table_ops.insert, + outcome.table_ops.delete, + outcome.table_ops.duplicate_insert, + outcome.table_ops.delete_missing, + outcome.table_ops.batch_insert, + outcome.table_ops.batch_delete, + outcome.table_ops.reinsert, + outcome.table_ops.point_lookup, + outcome.table_ops.predicate_count, + outcome.table_ops.range_scan, + outcome.table_ops.full_scan, + outcome.transactions.explicit_begin, + outcome.transactions.explicit_commit, + outcome.transactions.explicit_rollback, + outcome.transactions.auto_commit, + outcome.transactions.read_tx, + outcome.runtime.known_tokio_tasks_scheduled, + outcome.runtime.durability_actors_started, + alive_tasks )) }) } diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index b28169c826e..e61f19fc3eb 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -1,5 +1,6 @@ //! Concrete simulation targets. +pub(crate) mod buggified_repo; pub mod descriptor; pub(crate) mod properties; pub mod relational_db_commitlog; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 800679ec0b8..1c3cdbc5b84 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -1,6 +1,6 @@ //! RelationalDB DST target with mocked commitlog file chaos and replay checks. -use std::{collections::BTreeMap, io, ops::Bound, panic::AssertUnwindSafe, sync::Arc}; +use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, panic::AssertUnwindSafe, sync::Arc}; use spacetimedb_commitlog::repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}; use spacetimedb_core::{ @@ -32,11 +32,13 @@ use crate::{ core::{self, TargetEngine}, schema::{SchemaPlan, SimRow}, seed::DstSeed, + targets::buggified_repo::BuggifiedRepo, targets::properties::{ CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableObservation, TargetPropertyAccess, }, workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, + commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, table_ops::{ ConnectionWriteState, ExpectedErrorKind, TableOperation, TableScenario, TableScenarioId, TableWorkloadInteraction, TableWorkloadOutcome, @@ -96,6 +98,98 @@ struct DynamicTableState { table_id: TableId, } +#[derive(Default)] +struct RunStats { + interactions: InteractionSummary, + table_ops: TableOperationSummary, + transactions: TransactionStats, + runtime: RuntimeStats, +} + +#[derive(Default)] +struct TransactionStats { + explicit_begin: usize, + explicit_commit: usize, + explicit_rollback: usize, + auto_commit: usize, + read_tx: Cell, +} + +#[derive(Default)] +struct RuntimeStats { + durability_actors_started: usize, +} + +impl RunStats { + fn record_interaction_requested(&mut self, interaction: &CommitlogInteraction) { + match interaction { + CommitlogInteraction::Table(_) => self.interactions.table += 1, + CommitlogInteraction::CreateDynamicTable { .. } => self.interactions.create_dynamic_table += 1, + CommitlogInteraction::DropDynamicTable { .. } => self.interactions.drop_dynamic_table += 1, + CommitlogInteraction::MigrateDynamicTable { .. } => self.interactions.migrate_dynamic_table += 1, + CommitlogInteraction::ChaosSync => self.interactions.chaos_sync += 1, + CommitlogInteraction::CloseReopen => self.interactions.close_reopen_requested += 1, + } + } + + fn record_interaction_result(&mut self, interaction: &CommitlogInteraction, observation: &CommitlogObservation) { + if matches!(observation, CommitlogObservation::Skipped) { + self.interactions.skipped += 1; + } + if matches!(interaction, CommitlogInteraction::CloseReopen) { + match observation { + CommitlogObservation::Skipped => self.interactions.close_reopen_skipped += 1, + CommitlogObservation::Applied => self.interactions.close_reopen_applied += 1, + _ => {} + } + } + } + + fn record_table_operation(&mut self, op: &TableOperation) { + match op { + TableOperation::BeginTx { .. } => self.table_ops.begin_tx += 1, + TableOperation::CommitTx { .. } => self.table_ops.commit_tx += 1, + TableOperation::RollbackTx { .. } => self.table_ops.rollback_tx += 1, + TableOperation::Insert { .. } => self.table_ops.insert += 1, + TableOperation::Delete { .. } => self.table_ops.delete += 1, + TableOperation::DuplicateInsert { .. } => self.table_ops.duplicate_insert += 1, + TableOperation::DeleteMissing { .. } => self.table_ops.delete_missing += 1, + TableOperation::BatchInsert { .. } => self.table_ops.batch_insert += 1, + TableOperation::BatchDelete { .. } => self.table_ops.batch_delete += 1, + TableOperation::Reinsert { .. } => self.table_ops.reinsert += 1, + TableOperation::PointLookup { .. } => self.table_ops.point_lookup += 1, + TableOperation::PredicateCount { .. } => self.table_ops.predicate_count += 1, + TableOperation::RangeScan { .. } => self.table_ops.range_scan += 1, + TableOperation::FullScan { .. } => self.table_ops.full_scan += 1, + } + } + + fn record_read_tx(&self) { + self.transactions + .read_tx + .set(self.transactions.read_tx.get().saturating_add(1)); + } + + fn transaction_summary(&self, durable_commit_count: usize) -> TransactionSummary { + TransactionSummary { + explicit_begin: self.transactions.explicit_begin, + explicit_commit: self.transactions.explicit_commit, + explicit_rollback: self.transactions.explicit_rollback, + auto_commit: self.transactions.auto_commit, + read_tx: self.transactions.read_tx.get(), + durable_commit_count, + } + } + + fn runtime_summary(&self) -> RuntimeSummary { + RuntimeSummary { + known_tokio_tasks_scheduled: self.runtime.durability_actors_started, + durability_actors_started: self.runtime.durability_actors_started, + runtime_alive_tasks: runtime_alive_tasks(), + } + } +} + /// Engine executing mixed table+lifecycle interactions while recording mocked durable history. struct RelationalDbEngine { db: Option, @@ -108,8 +202,10 @@ struct RelationalDbEngine { last_durable_snapshot: DurableSnapshot, pending_snapshot_capture: bool, durability: Arc, + durability_opts: spacetimedb_durability::local::Options, runtime_handle: tokio::runtime::Handle, - commitlog_repo: MemoryCommitlogRepo, + commitlog_repo: StressCommitlogRepo, + stats: RunStats, _runtime_guard: Option, } @@ -117,7 +213,8 @@ type DurableSnapshot = BTreeMap>; impl RelationalDbEngine { fn new(seed: DstSeed, schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - let (db, runtime_handle, commitlog_repo, durability, runtime_guard) = bootstrap_relational_db(seed.fork(700))?; + let (db, runtime_handle, commitlog_repo, durability, durability_opts, runtime_guard) = + bootstrap_relational_db(seed.fork(700))?; let mut this = Self { db: Some(db), execution: ConnectionWriteState::new(num_connections), @@ -129,8 +226,15 @@ impl RelationalDbEngine { last_durable_snapshot: BTreeMap::new(), pending_snapshot_capture: false, durability, + durability_opts, runtime_handle, commitlog_repo, + stats: RunStats { + runtime: RuntimeStats { + durability_actors_started: 1, + }, + ..Default::default() + }, _runtime_guard: runtime_guard, }; this.install_base_schema().map_err(anyhow::Error::msg)?; @@ -194,7 +298,8 @@ impl RelationalDbEngine { async fn execute(&mut self, interaction: &CommitlogInteraction) -> Result { self.step = self.step.saturating_add(1); - match interaction { + self.stats.record_interaction_requested(interaction); + let observation = match interaction { CommitlogInteraction::Table(op) => self.execute_table_op(op).map(CommitlogObservation::Table), CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), @@ -204,7 +309,9 @@ impl RelationalDbEngine { Ok(CommitlogObservation::Applied) } CommitlogInteraction::CloseReopen => self.close_and_reopen().await, - } + }?; + self.stats.record_interaction_result(interaction, &observation); + Ok(observation) } async fn close_and_reopen(&mut self) -> Result { @@ -228,7 +335,7 @@ impl RelationalDbEngine { InMemoryCommitlogDurability::open_with_repo( self.commitlog_repo.clone(), self.runtime_handle.clone(), - Default::default(), + self.durability_opts, ) .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, ); @@ -252,6 +359,7 @@ impl RelationalDbEngine { "unexpected connected clients after reopen: {connected_clients:?}" )); } + self.stats.runtime.durability_actors_started += 1; self.durability = durability; self.db = Some(db); self.rebuild_table_handles_after_reopen()?; @@ -267,6 +375,7 @@ impl RelationalDbEngine { fn rebuild_table_handles_after_reopen(&mut self) -> Result<(), String> { let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); let schemas = db .get_all_tables(&tx) .map_err(|err| format!("list tables after reopen failed: {err}"))?; @@ -300,7 +409,11 @@ impl RelationalDbEngine { fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result { match std::panic::catch_unwind(AssertUnwindSafe(|| self.execute_table_op_inner(interaction))) { - Ok(result) => result, + Ok(Ok(observation)) => { + self.stats.record_table_operation(&interaction.op); + Ok(observation) + } + Ok(Err(err)) => Err(err), Err(payload) => Err(format!( "[DatastoreNeverPanics] interaction panicked: interaction={interaction:?}, payload={}", panic_payload_to_string(&payload) @@ -326,6 +439,7 @@ impl RelationalDbEngine { .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), ); self.execution.active_writer = Some(*conn); + self.stats.transactions.explicit_begin += 1; Ok(TableObservation::Applied) } TableOperation::CommitTx { conn } => { @@ -337,6 +451,7 @@ impl RelationalDbEngine { .commit_tx(tx) .map_err(|err| format!("commit interaction failed: {err}"))?; self.execution.active_writer = None; + self.stats.transactions.explicit_commit += 1; self.capture_pending_snapshot_if_idle()?; Ok(TableObservation::CommitOrRollback) } @@ -347,6 +462,7 @@ impl RelationalDbEngine { .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = self.db()?.rollback_mut_tx(tx); self.execution.active_writer = None; + self.stats.transactions.explicit_rollback += 1; self.capture_pending_snapshot_if_idle()?; Ok(TableObservation::CommitOrRollback) } @@ -582,6 +698,7 @@ impl RelationalDbEngine { .commit_tx(tx) .map_err(|err| format!("auto-commit write failed: {err}"))?; self.execution.active_writer = None; + self.stats.transactions.auto_commit += 1; self.capture_pending_snapshot_if_idle()?; Ok(value) } @@ -758,6 +875,7 @@ impl RelationalDbEngine { } else { let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); let found = self .db()? .iter_by_col_eq(&tx, table_id, 0u16, &AlgebraicValue::U64(id)) @@ -823,6 +941,7 @@ impl RelationalDbEngine { } else { let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); let rows = self .db()? .iter_by_col_range(&tx, table_id, col_list, (lower, upper)) @@ -840,6 +959,7 @@ impl RelationalDbEngine { let table_id = self.table_id_for_index(table)?; let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); let total = self .db()? .iter(&tx, table_id) @@ -853,6 +973,7 @@ impl RelationalDbEngine { let table_id = self.table_id_for_index(table)?; let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); let total = self .db()? .iter_by_col_eq(&tx, table_id, col, value) @@ -872,6 +993,7 @@ impl RelationalDbEngine { let table_id = self.table_id_for_index(table)?; let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); let cols = cols.iter().copied().collect::(); let rows = self .db()? @@ -886,6 +1008,7 @@ impl RelationalDbEngine { fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); let mut rows = self .db()? .iter(&tx, table_id) @@ -928,6 +1051,11 @@ impl RelationalDbEngine { applied_steps: self.step, durable_commit_count, replay_table_count: self.last_durable_snapshot.len(), + schema: schema_summary(&self.base_schema), + interactions: self.stats.interactions.clone(), + table_ops: self.stats.table_ops.clone(), + transactions: self.stats.transaction_summary(durable_commit_count), + runtime: self.stats.runtime_summary(), table, }) } @@ -1020,15 +1148,17 @@ impl TargetEngine for RelationalDbEngine { } } -type InMemoryCommitlogDurability = Local; +type StressCommitlogRepo = BuggifiedRepo; +type InMemoryCommitlogDurability = Local; fn bootstrap_relational_db( - _seed: DstSeed, + seed: DstSeed, ) -> anyhow::Result<( RelationalDB, tokio::runtime::Handle, - MemoryCommitlogRepo, + StressCommitlogRepo, Arc, + spacetimedb_durability::local::Options, Option, )> { let (runtime_handle, runtime_guard) = if let Ok(handle) = tokio::runtime::Handle::try_current() { @@ -1037,9 +1167,12 @@ fn bootstrap_relational_db( let runtime = tokio::runtime::Runtime::new()?; (runtime.handle().clone(), Some(runtime)) }; - let commitlog_repo = MemoryCommitlogRepo::unlimited(); + enable_madsim_buggify(); + + let commitlog_repo = BuggifiedRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024)); + let durability_opts = commitlog_stress_options(seed.fork(701)); let durability = Arc::new( - InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime_handle.clone(), Default::default()) + InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime_handle.clone(), durability_opts) .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?, ); let persistence = Persistence { @@ -1060,7 +1193,58 @@ fn bootstrap_relational_db( db.with_auto_commit(Workload::Internal, |tx| { db.set_initialized(tx, Program::empty(HostType::Wasm.into())) })?; - Ok((db, runtime_handle, commitlog_repo, durability, runtime_guard)) + Ok(( + db, + runtime_handle, + commitlog_repo, + durability, + durability_opts, + runtime_guard, + )) +} + +fn commitlog_stress_options(seed: DstSeed) -> spacetimedb_durability::local::Options { + let mut opts = spacetimedb_durability::local::Options::default(); + opts.commitlog.max_segment_size = 2 * 1024; + opts.commitlog.offset_index_interval_bytes = NonZeroU64::new(256).expect("256 > 0"); + opts.commitlog.offset_index_require_segment_fsync = seed.0 % 2 == 0; + opts.commitlog.write_buffer_size = 512; + opts +} + +fn enable_madsim_buggify() { + #[cfg(madsim)] + madsim::buggify::enable(); +} + +fn runtime_alive_tasks() -> Option { + // The madsim runtime exposes live task metrics on `Runtime`, but the target + // only receives Tokio-compatible handles. Keep this explicit instead of + // reporting madsim-tokio's dummy zero-valued metrics as real data. + None +} + +fn schema_summary(schema: &SchemaPlan) -> SchemaSummary { + let initial_tables = schema.tables.len(); + let initial_columns = schema.tables.iter().map(|table| table.columns.len()).sum(); + let max_columns_per_table = schema + .tables + .iter() + .map(|table| table.columns.len()) + .max() + .unwrap_or_default(); + let extra_indexes = schema + .tables + .iter() + .map(|table| table.extra_indexes.len()) + .sum::(); + SchemaSummary { + initial_tables, + initial_columns, + max_columns_per_table, + initial_indexes: initial_tables + extra_indexes, + extra_indexes, + } } fn in_memory_size_on_disk() -> io::Result { diff --git a/crates/dst/src/workload/commitlog_ops/mod.rs b/crates/dst/src/workload/commitlog_ops/mod.rs index 8c8191f7489..2ce68e4ae81 100644 --- a/crates/dst/src/workload/commitlog_ops/mod.rs +++ b/crates/dst/src/workload/commitlog_ops/mod.rs @@ -4,4 +4,7 @@ mod generation; mod types; pub(crate) use generation::NextInteractionGeneratorComposite; -pub use types::{CommitlogInteraction, CommitlogWorkloadOutcome}; +pub use types::{ + CommitlogInteraction, CommitlogWorkloadOutcome, InteractionSummary, RuntimeSummary, SchemaSummary, + TableOperationSummary, TransactionSummary, +}; diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index 5858ee7feca..2b067273720 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -25,5 +25,67 @@ pub struct CommitlogWorkloadOutcome { pub applied_steps: usize, pub durable_commit_count: usize, pub replay_table_count: usize, + pub schema: SchemaSummary, + pub interactions: InteractionSummary, + pub table_ops: TableOperationSummary, + pub transactions: TransactionSummary, + pub runtime: RuntimeSummary, pub table: TableWorkloadOutcome, } + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct SchemaSummary { + pub initial_tables: usize, + pub initial_columns: usize, + pub max_columns_per_table: usize, + pub initial_indexes: usize, + pub extra_indexes: usize, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct InteractionSummary { + pub table: usize, + pub create_dynamic_table: usize, + pub drop_dynamic_table: usize, + pub migrate_dynamic_table: usize, + pub chaos_sync: usize, + pub close_reopen_requested: usize, + pub close_reopen_applied: usize, + pub close_reopen_skipped: usize, + pub skipped: usize, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct TableOperationSummary { + pub begin_tx: usize, + pub commit_tx: usize, + pub rollback_tx: usize, + pub insert: usize, + pub delete: usize, + pub duplicate_insert: usize, + pub delete_missing: usize, + pub batch_insert: usize, + pub batch_delete: usize, + pub reinsert: usize, + pub point_lookup: usize, + pub predicate_count: usize, + pub range_scan: usize, + pub full_scan: usize, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct TransactionSummary { + pub explicit_begin: usize, + pub explicit_commit: usize, + pub explicit_rollback: usize, + pub auto_commit: usize, + pub read_tx: usize, + pub durable_commit_count: usize, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct RuntimeSummary { + pub known_tokio_tasks_scheduled: usize, + pub durability_actors_started: usize, + pub runtime_alive_tasks: Option, +} diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index a1471d41703..afbd20e2e11 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -28,7 +28,7 @@ struct ScenarioTuning { } const RANDOM_CRUD_TUNING: ScenarioTuning = ScenarioTuning { - min_tables: 1, + min_tables: 2, table_count_choices: 3, min_extra_cols: 1, extra_col_choices: 4, @@ -45,7 +45,7 @@ const RANDOM_CRUD_TUNING: ScenarioTuning = ScenarioTuning { }; const INDEXED_RANGES_TUNING: ScenarioTuning = ScenarioTuning { - min_tables: 1, + min_tables: 2, table_count_choices: 2, min_extra_cols: 3, extra_col_choices: 3, @@ -70,7 +70,7 @@ pub fn generate_indexed_ranges_schema(rng: &mut DstRng) -> SchemaPlan { } fn generate_schema_with_tuning(rng: &mut DstRng, tuning: ScenarioTuning) -> SchemaPlan { - let table_count = tuning.min_tables + rng.index(tuning.table_count_choices); + let table_count = tuning.min_tables + mixed_index(rng, tuning.table_count_choices); let mut tables = Vec::with_capacity(table_count); for table_idx in 0..table_count { @@ -125,6 +125,12 @@ fn generate_schema_with_tuning(rng: &mut DstRng, tuning: ScenarioTuning) -> Sche SchemaPlan { tables } } +fn mixed_index(rng: &mut DstRng, len: usize) -> usize { + assert!(len > 0, "len must be non-zero"); + let value = rng.next_u64(); + ((value ^ (value >> 32)) as usize) % len +} + pub fn validate_outcome(_schema: &SchemaPlan, _outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { Ok(()) } From 0e850dc519b3a0a71b6653765670fadee6850d70 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Sun, 3 May 2026 15:40:26 +0530 Subject: [PATCH 23/74] readability --- Cargo.lock | 10 +- Cargo.toml | 2 + crates/core/Cargo.toml | 1 + crates/core/src/auth/token_validation.rs | 2 +- crates/core/src/database_logger.rs | 11 +- crates/core/src/host/disk_storage.rs | 4 +- crates/core/src/host/instance_env.rs | 2 +- crates/core/src/util/jobs.rs | 2 +- crates/dst/Cargo.toml | 4 - crates/dst/README.md | 212 ++++++-- crates/dst/src/config.rs | 60 ++- crates/dst/src/core/mod.rs | 26 +- crates/dst/src/lib.rs | 23 + crates/dst/src/main.rs | 66 ++- crates/dst/src/schema.rs | 75 +++ crates/dst/src/targets/buggified_repo.rs | 402 ++++++++++++-- crates/dst/src/targets/descriptor.rs | 136 +++-- crates/dst/src/targets/properties.rs | 69 ++- .../src/targets/relational_db_commitlog.rs | 506 ++++++++++++++---- crates/dst/src/targets/standalone_host.rs | 119 ++-- .../src/workload/commitlog_ops/generation.rs | 83 ++- crates/dst/src/workload/commitlog_ops/mod.rs | 6 +- .../dst/src/workload/commitlog_ops/types.rs | 60 ++- .../dst/src/workload/module_ops/generation.rs | 8 +- crates/dst/src/workload/module_ops/mod.rs | 2 +- crates/dst/src/workload/strategy.rs | 11 +- .../dst/src/workload/table_ops/generation.rs | 91 +++- crates/dst/src/workload/table_ops/mod.rs | 2 +- crates/dst/src/workload/table_ops/model.rs | 132 ++++- .../table_ops/scenarios/random_crud.rs | 271 +++++++++- crates/dst/src/workload/table_ops/types.rs | 133 +++-- crates/dst/tests/madsim_axum_reqwest.rs | 36 -- crates/dst/tests/madsim_tcp.rs | 39 ++ crates/io/Cargo.toml | 13 + crates/io/LICENSE | 1 + crates/io/src/lib.rs | 73 +++ tools/ci/README.md | 11 + tools/ci/src/main.rs | 79 +++ 38 files changed, 2281 insertions(+), 502 deletions(-) delete mode 100644 crates/dst/tests/madsim_axum_reqwest.rs create mode 100644 crates/dst/tests/madsim_tcp.rs create mode 100644 crates/io/Cargo.toml create mode 120000 crates/io/LICENSE create mode 100644 crates/io/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 4e88c4d1e8a..d3ffccc7d7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8193,6 +8193,7 @@ dependencies = [ "spacetimedb-execution", "spacetimedb-expr", "spacetimedb-fs-utils", + "spacetimedb-io", "spacetimedb-jsonwebtoken", "spacetimedb-jwks", "spacetimedb-lib 2.1.0", @@ -8290,12 +8291,10 @@ name = "spacetimedb-dst" version = "2.1.0" dependencies = [ "anyhow", - "axum", "bytes", "clap 4.5.50", "madsim", "madsim-tokio", - "reqwest 0.12.24", "spacetimedb-cli", "spacetimedb-client-api", "spacetimedb-client-api-messages", @@ -8395,6 +8394,13 @@ dependencies = [ "tempfile", ] +[[package]] +name = "spacetimedb-io" +version = "2.1.0" +dependencies = [ + "madsim-tokio", +] + [[package]] name = "spacetimedb-jsonwebtoken" version = "9.3.0" diff --git a/Cargo.toml b/Cargo.toml index 72048d8e906..81db5f1e92d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "crates/physical-plan", "crates/primitives", "crates/query", + "crates/io", "crates/sats", "crates/schema", "crates/smoketests", @@ -137,6 +138,7 @@ spacetimedb-pg = { path = "crates/pg", version = "=2.1.0" } spacetimedb-physical-plan = { path = "crates/physical-plan", version = "=2.1.0" } spacetimedb-primitives = { path = "crates/primitives", version = "=2.1.0" } spacetimedb-query = { path = "crates/query", version = "=2.1.0" } +spacetimedb-io = { path = "crates/io", version = "=2.1.0" } spacetimedb-sats = { path = "crates/sats", version = "=2.1.0" } spacetimedb-schema = { path = "crates/schema", version = "=2.1.0" } spacetimedb-standalone = { path = "crates/standalone", version = "=2.1.0" } diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 1578ee4bf59..443c355e79e 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -28,6 +28,7 @@ spacetimedb-primitives.workspace = true spacetimedb-paths.workspace = true spacetimedb-physical-plan.workspace = true spacetimedb-query.workspace = true +spacetimedb-io.workspace = true spacetimedb-sats = { workspace = true, features = ["serde"] } spacetimedb-schema.workspace = true spacetimedb-table.workspace = true diff --git a/crates/core/src/auth/token_validation.rs b/crates/core/src/auth/token_validation.rs index c38d732882d..c644de5af61 100644 --- a/crates/core/src/auth/token_validation.rs +++ b/crates/core/src/auth/token_validation.rs @@ -481,7 +481,7 @@ mod tests { use axum::routing::get; use axum::Json; use axum::Router; - use tokio::net::TcpListener; + use spacetimedb_io::net::TcpListener; use tokio::sync::oneshot; use serde::{Deserialize, Serialize}; diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index 0e202229dea..4804cea8093 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -3,6 +3,8 @@ use chrono::{NaiveDate, Utc}; use futures::stream::{self, BoxStream}; use futures::{Stream, StreamExt as _, TryStreamExt}; use pin_project_lite::pin_project; +use spacetimedb_io::fs::FileFromStd; +use spacetimedb_io::io::{AsyncRead, BufReader, ReadBuf}; use std::collections::VecDeque; use std::fs::File; use std::future; @@ -11,7 +13,6 @@ use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use tokio::io::{AsyncRead, BufReader}; use tokio::sync::{broadcast, mpsc, oneshot}; use tokio_stream::wrappers::errors::BroadcastStreamRecvError; use tokio_stream::wrappers::BroadcastStream; @@ -107,7 +108,7 @@ impl Logger for FileLogger { seek_to(&mut file, &mut buf, n)?; } - Ok::<_, io::Error>(tokio::fs::File::from_std(file)) + Ok::<_, io::Error>(spacetimedb_io::fs::file_from_std(file)) } })) .map_ok(ReaderStream::new) @@ -626,14 +627,14 @@ fn into_file_stream(file: impl Into>) -> impl Stream) -> Self { - match file.map(tokio::fs::File::from_std) { + match file.map(spacetimedb_io::fs::file_from_std) { Some(inner) => Self::File { inner }, None => Self::Empty, } @@ -641,7 +642,7 @@ impl MaybeFile { } impl AsyncRead for MaybeFile { - fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>) -> Poll> { + fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { match self.project() { MaybeFileProj::File { inner } => inner.poll_read(cx, buf), MaybeFileProj::Empty => Poll::Ready(Ok(())), diff --git a/crates/core/src/host/disk_storage.rs b/crates/core/src/host/disk_storage.rs index 3c55472aa16..7662f50db38 100644 --- a/crates/core/src/host/disk_storage.rs +++ b/crates/core/src/host/disk_storage.rs @@ -1,9 +1,9 @@ use async_trait::async_trait; +use spacetimedb_io::fs; +use spacetimedb_io::io::AsyncWriteExt; use spacetimedb_lib::{hash_bytes, Hash}; use std::io; use std::path::PathBuf; -use tokio::fs; -use tokio::io::AsyncWriteExt; use super::ExternalStorage; diff --git a/crates/core/src/host/instance_env.rs b/crates/core/src/host/instance_env.rs index 0d3d41632b1..4c84a775a5d 100644 --- a/crates/core/src/host/instance_env.rs +++ b/crates/core/src/host/instance_env.rs @@ -1019,7 +1019,7 @@ impl reqwest::dns::Resolve for FilteredDnsResolver { fn resolve(&self, name: reqwest::dns::Name) -> reqwest::dns::Resolving { let host = name.as_str().to_owned(); Box::pin(async move { - let addrs = tokio::net::lookup_host((host.as_str(), 0)).await?; + let addrs = spacetimedb_io::net::lookup_host((host.as_str(), 0)).await?; let filtered_addrs: Vec = addrs.filter(|addr| !is_blocked_ip(addr.ip())).collect(); if filtered_addrs.is_empty() { diff --git a/crates/core/src/util/jobs.rs b/crates/core/src/util/jobs.rs index b09f2db4d88..2ac4122d3ab 100644 --- a/crates/core/src/util/jobs.rs +++ b/crates/core/src/util/jobs.rs @@ -7,9 +7,9 @@ use futures::FutureExt; use indexmap::IndexMap; use smallvec::SmallVec; use spacetimedb_data_structures::map::HashMap; -use tokio::sync::{mpsc, oneshot, watch}; #[cfg(not(madsim))] use tokio::runtime; +use tokio::sync::{mpsc, oneshot, watch}; use tracing::Instrument; use crate::util::thread_scheduling::apply_compute_thread_hint; diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 870781199f2..95348d1d509 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -37,7 +37,3 @@ spacetimedb-table.workspace = true tracing.workspace = true tracing-subscriber.workspace = true madsim = { path = "../../../../madsim/madsim" } - -[dev-dependencies] -axum.workspace = true -reqwest.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md index 28974746cdf..e0b0a1452a3 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -1,56 +1,155 @@ # `spacetimedb-dst` -Deterministic simulation testing for SpacetimeDB targets. +Deterministic simulation testing for SpacetimeDB components. + +DST is not a generic random fuzzer. It is a seed-replayable framework for +generating meaningful SpacetimeDB histories, executing them against real +implementation paths, and checking semantic properties while the run is still +in progress. + +## First Principles + +- A failing run must be reproducible from target, scenario, seed, run budget, + and fault profile. Use `--max-interactions` for exact replay; `--duration` is + a wall-clock soak budget and may stop at a different step count on another + machine or runtime. +- Workloads describe legal but stressful user behavior. They should not depend + on target internals. +- Targets execute interactions against real SpacetimeDB code. +- Properties check externally observable behavior, preferably against a simple + model or a replayed durable history. +- Generation, execution, and property checking stay separate so failures are + diagnosable as workload bugs, target bugs, or weak assertions. +- Runs stream interactions instead of materializing a full plan by default. +- Fault injection is explicit, configurable, and summarized in the outcome. +- Shared probability and weighting logic belongs in `workload::strategy`, not + ad hoc scenario code. + +## Current Architecture + +The CLI selects a target, scenario, seed, budget, and fault profile. The shared +runner pulls one interaction at a time from a source, sends it to the target, +and asks the property runtime to observe the result. + +```text +CLI -> TargetDescriptor -> NextInteractionSource -> TargetEngine -> Observation + \-> StreamingProperties -> Outcome +``` -## How DST Works +The core contracts are: -DST is CLI-first and interaction-stream based: +- `NextInteractionSource`: deterministic pull-based interaction stream. +- `TargetEngine`: target-specific execution and outcome collection. +- `StreamingProperties`: reusable property checks over observations and target + accessors. -1. CLI picks `target`, `scenario`, `seed`, and run budget. -2. A workload generator emits `next_interaction()` deterministically. -3. The target engine executes each interaction on a real implementation. -4. Target properties validate behavior during the run and at finish. -5. Run stops on first failure or budget expiry (`--duration` / `--max-interactions`). +## Workload Composition -There is no case materialization/replay path in the current crate. All runs are -generated and executed as a deterministic stream. +DST workloads use three building blocks: -## Current Targets +- **Source:** emits a deterministic stream of interactions. +- **Profile:** configures weights, schema shape, and generation policy. +- **Layer:** wraps a source and adds lifecycle, fault, or cross-cutting + interactions. + +`table_ops` is the base table-transaction workload. `commitlog_ops` composes it +and injects durability lifecycle operations such as sync, close/reopen, dynamic +table create/migrate/drop, and replay checks. `module_ops` drives standalone +host/module interactions. + +Use this rule of thumb: -- `relational-db-commitlog` -- `standalone-host` +- Add a new profile when the interaction language is unchanged and only weights + or schema shape differ. +- Add a new layer when you are adding lifecycle behavior around an existing + source. +- Add a new workload family only when the interaction vocabulary is genuinely + different. -Both targets reuse shared workload families and share the same streaming runner. +## Table Operation Semantics -## Workload Families +The table workload intentionally distinguishes similar-looking operations: + +- `ExactDuplicateInsert`: reinserts a full row that is already visible. For + RelationalDB set semantics, this should be an idempotent no-op. +- `UniqueKeyConflictInsert`: inserts a row with an existing primary id but a + different non-key payload. This should fail with `UniqueConstraintViolation`. +- `DeleteMissing`: deleting an absent row should report no mutation. +- `BeginTxConflict` / `WriteConflictInsert`: expected write-lock failures. +- Query operations (`PointLookup`, `PredicateCount`, `RangeScan`, `FullScan`) + are metamorphic/model oracles, not mutations. + +Keeping these cases separate matters: an exact duplicate and a unique-key +conflict exercise different datastore semantics. + +## Current Targets -- `workload/table_ops`: transactional table operations (create schema, insert, - delete, begin/commit/rollback patterns). -- `workload/commitlog_ops`: composes `table_ops` and injects lifecycle/chaos - operations (sync/close-reopen/dynamic-table ops) for commitlog durability - testing. +- `relational-db-commitlog`: runs table and commitlog lifecycle interactions + against `RelationalDB`, local durability, dynamic schema operations, + close/reopen, and replay-from-history checks. +- `standalone-host`: runs generated module interactions against a standalone + host environment. + +Both targets reuse shared workload families and the same streaming runner. ## Properties -Properties are target-owned and reusable across targets via -`targets/properties.rs`. A target chooses which property kinds to enable and -applies them through a shared `PropertyRuntime`. +Properties live in `targets/properties.rs` and are selected by target. +Table-oriented properties use `TargetPropertyAccess` so the property runtime can +ask a target for rows, counts, lookups, and range scans without knowing target +storage internals. + +Current property families include: + +- insert/select and delete/select checks +- expected error matching +- point lookup, predicate count, range scan, and full scan vs `ExpectedModel` +- NoREC-style optimizer-vs-direct checks +- TLP-style true/false/null partition checks +- index range exclusion checks +- banking mirror-table invariants +- dynamic migration auto-increment checks +- durable replay state vs the expected committed model + +## Fault Injection -Examples: +`relational-db-commitlog` can wrap the in-memory commitlog repo in +`BuggifiedRepo`. Fault decisions are deterministic under madsim and summarized +in the final outcome. -- `PQS::InsertSelect` -- `DeleteSelect` -- `NoREC::SelectSelectOptimizer` -- `TLP::WhereTrueFalseNull` -- `IndexRangeExcluded` -- `BankingTablesMatch` +Profiles: -## CLI +- `off`: no injected disk behavior. +- `light`: latency and occasional short I/O. +- `default`: stronger latency and short I/O pressure. +- `aggressive`: higher latency and short I/O rates. I/O error hooks exist but + are currently disabled in profile-driven runs because local durability does + not yet classify those errors as recoverable target outcomes. + +## Running + +Fast local run: + +```bash +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --seed 42 --max-interactions 200 +``` + +Scenario examples: ```bash cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario banking --duration 5m cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario indexed-ranges --duration 5m -cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --seed 42 --max-interactions 2000 +cargo run -p spacetimedb-dst -- run --target standalone-host --scenario host-smoke --max-interactions 100 +``` + +madsim run with commitlog faults: + +```bash +RUSTFLAGS='--cfg madsim' cargo run -p spacetimedb-dst -- run \ + --target relational-db-commitlog \ + --seed 42 \ + --max-interactions 400 \ + --commitlog-fault-profile default ``` Trace every interaction: @@ -59,13 +158,50 @@ Trace every interaction: RUST_LOG=trace cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --duration 5m ``` +## Run Budgets + +Prefer `--max-interactions` when reporting or replaying a failure. It is the +deterministic interaction budget, so target, scenario, seed, interaction count, +and fault profile are enough to rerun the same generated stream. + +Use `--duration` for local soaks. It is intentionally wall-clock based, so it +can stop after a different number of interactions if host speed, logging, or +runtime behavior changes. + +## Reading The Code + +Start here: + +- `src/core/mod.rs`: source, engine, property, and runner traits. +- `src/workload/table_ops`: table interaction language, generation model, and + scenarios. +- `src/workload/commitlog_ops`: lifecycle layer over table workloads. +- `src/targets/properties.rs`: property catalog and expected model checks. +- `src/targets/relational_db_commitlog.rs`: target adapter for RelationalDB, + commitlog durability, fault injection, close/reopen, and replay. +- `src/targets/buggified_repo.rs`: deterministic disk-like fault layer. + ## Adding A New Target 1. Add a target engine in `src/targets/.rs`. 2. Reuse an existing workload family or add `src/workload//`. -3. Plug target-specific properties through `PropertyRuntime`. -4. Add a `TargetDescriptor` in `src/targets/descriptor.rs`. -5. Register in CLI `TargetKind`. - -Use `table_ops` when semantics are table-transaction oriented. Add a new -workload family when you need lifecycle/network/replication semantics. +3. Return observations that are rich enough for properties to validate behavior. +4. Plug target-specific properties through `PropertyRuntime`. +5. Add a `TargetDescriptor` in `src/targets/descriptor.rs`. +6. Register the target in CLI `TargetKind`. + +## Current Gaps + +- No structured trace/replay format yet. +- No shrinker yet; seed replay is the current reproduction mechanism. +- Sometimes-property reporting is still outcome-counter based, not a stable + property-event catalog. +- madsim is used for current deterministic runtime/fault hooks; deeper + host/network/filesystem simulation still needs explicit runtime and IO + boundaries. +- The current `RelationalDB` target drives open read snapshots to release before + starting writes, because beginning a write behind an open read snapshot can + block in this target shape. Interleaved read/write snapshot histories should + come back once the target models that lock behavior explicitly. +- Current madsim builds still expose runtime-boundary gaps, including + `spawn_blocking` call sites and randomized standard `HashMap` state warnings. diff --git a/crates/dst/src/config.rs b/crates/dst/src/config.rs index 10c2fe3abf9..5968c5abb96 100644 --- a/crates/dst/src/config.rs +++ b/crates/dst/src/config.rs @@ -1,14 +1,58 @@ //! Shared run-budget configuration for DST targets. -use std::time::{Duration, Instant}; +use std::{ + fmt, + time::{Duration, Instant}, +}; + +/// Coarse disk-fault profile for commitlog-backed DST targets. +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +pub enum CommitlogFaultProfile { + Off, + Light, + #[default] + Default, + Aggressive, +} + +impl fmt::Display for CommitlogFaultProfile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Off => f.write_str("off"), + Self::Light => f.write_str("light"), + Self::Default => f.write_str("default"), + Self::Aggressive => f.write_str("aggressive"), + } + } +} /// Common stop conditions for generated DST runs. -#[derive(Clone, Debug, Default, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct RunConfig { /// Hard cap on generated interactions. `None` means no interaction budget. + /// + /// This is the preferred budget for exact seed replay: the same target, + /// scenario, seed, max-interactions value, and fault profile should produce + /// the same generated interaction stream. pub max_interactions: Option, /// Wall-clock duration budget in milliseconds. `None` means no time budget. + /// + /// Duration runs are useful as local soaks, but the exact stop step can vary + /// with host speed and runtime behavior. Use `max_interactions` when a + /// failure needs precise replay. pub max_duration_ms: Option, + /// Disk-fault profile for commitlog-backed targets. + pub commitlog_fault_profile: CommitlogFaultProfile, +} + +impl Default for RunConfig { + fn default() -> Self { + Self { + max_interactions: None, + max_duration_ms: None, + commitlog_fault_profile: CommitlogFaultProfile::Default, + } + } } impl RunConfig { @@ -16,6 +60,7 @@ impl RunConfig { Self { max_interactions: Some(max_interactions), max_duration_ms: None, + ..Default::default() } } @@ -23,9 +68,20 @@ impl RunConfig { Ok(Self { max_interactions: None, max_duration_ms: Some(parse_duration_spec(duration)?.as_millis() as u64), + ..Default::default() }) } + pub fn with_commitlog_fault_profile(mut self, profile: CommitlogFaultProfile) -> Self { + self.commitlog_fault_profile = profile; + self + } + + /// Return the wall-clock deadline for duration-budgeted runs. + /// + /// This intentionally uses `std::time::Instant`, not simulated time. DST + /// duration budgets are a harness stop condition rather than part of the + /// simulated system under test. pub fn deadline(&self) -> Option { self.max_duration_ms .map(Duration::from_millis) diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index 8f51f749e84..34e8a75e85c 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -1,6 +1,8 @@ //! Core abstractions for pluggable DST workloads, engines, and properties. -use crate::{config::RunConfig, seed::DstSeed}; +use std::future::Future; + +use crate::config::RunConfig; /// Pull-based deterministic interaction source. pub trait NextInteractionSource { @@ -10,25 +12,18 @@ pub trait NextInteractionSource { fn request_finish(&mut self); } -/// A workload plan executed on-demand through `next_interaction`. -pub trait WorkloadPlan { - type Interaction: Clone + Send + Sync + 'static; - fn next_interactions( - &self, - seed: DstSeed, - cfg: RunConfig, - ) -> Box>; -} - /// Target execution contract over a workload interaction stream. pub trait TargetEngine { type Observation; type Outcome; type Error; - async fn execute_interaction(&mut self, interaction: &I) -> Result; + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a I, + ) -> impl Future> + 'a; fn finish(&mut self); - fn collect_outcome(&mut self) -> anyhow::Result; + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a; } /// Property runtime contract for the shared streaming runner. @@ -53,6 +48,9 @@ where E: TargetEngine, P: StreamingProperties, { + // Duration is a harness-level wall-clock stop condition. The reproducible + // budget for exact replay is `RunConfig::max_interactions`, which the + // source uses when it is constructed. let deadline = cfg.deadline(); let mut step = 0usize; loop { @@ -72,7 +70,7 @@ where step = step.saturating_add(1); } engine.finish(); - let outcome = engine.collect_outcome()?; + let outcome = engine.collect_outcome().await?; properties .finish(&engine, &outcome) .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index dcfe5e91c06..aa556ac000b 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -6,6 +6,29 @@ //! - [`seed`] for deterministic seeds, //! - [`workload`] for scenario identifiers, //! - [`targets`] for executable relational-db / standalone-host adapters. +//! +//! ## DST principles +//! +//! 1. Every generated choice comes from [`seed::DstSeed`] or a simulator-provided +//! deterministic source. A failing run should be replayable from the printed +//! seed and CLI arguments. Use `--max-interactions` for exact replay; duration +//! budgets are wall-clock soak limits. +//! 2. Workloads describe legal but stressful user behavior. Targets may add +//! faults and lifecycle disruption, but the generator should not depend on +//! target internals. +//! 3. Oracles should check observable state, not merely absence of panics. When +//! possible, compare the target against a simple model or a replayed durable +//! history. +//! 4. Keep generation, execution, and property checking separate. This makes it +//! clear whether a failure came from an invalid workload, a target bug, or a +//! weak assertion. +//! 5. Prefer streaming state machines over precomputed traces. DST runs should +//! scale by budget and duration without materializing the whole workload. +//! 6. Fault injection must be explicit, configurable, and summarized in the run +//! output. Profiles should start with recoverable API-level behavior before +//! introducing crash or corruption semantics. +//! 7. Shared randomness, weighting, and sampling helpers belong in the +//! workload strategy module, not in ad hoc target or scenario code. /// Shared run-budget configuration for DST targets. pub mod config; diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index e7a7961b11b..30327001368 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -5,7 +5,7 @@ use std::{ use clap::{Args, Parser, Subcommand, ValueEnum}; use spacetimedb_dst::{ - config::RunConfig, + config::{CommitlogFaultProfile, RunConfig}, seed::DstSeed, targets::descriptor::{RelationalDbCommitlogDescriptor, StandaloneHostDescriptor, TargetDescriptor}, workload::{module_ops::HostScenarioId, table_ops::TableScenarioId}, @@ -36,12 +36,22 @@ struct TargetArgs { struct RunArgs { #[command(flatten)] target: TargetArgs, - #[arg(long)] + #[arg(long, help = "Seed for generated choices. Defaults to wall-clock time.")] seed: Option, - #[arg(long)] + #[arg( + long, + help = "Wall-clock soak budget such as 500ms, 10s, 5m, or 1h. Use --max-interactions for exact replay." + )] duration: Option, - #[arg(long)] + #[arg(long, help = "Deterministic interaction budget. Preferred for replayable failures.")] max_interactions: Option, + #[arg( + long, + value_enum, + default_value_t = CommitlogFaultProfileKind::Default, + help = "Commitlog disk-fault profile for commitlog-backed targets." + )] + commitlog_fault_profile: CommitlogFaultProfileKind, } #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] @@ -58,6 +68,25 @@ enum ScenarioKind { HostSmoke, } +#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] +enum CommitlogFaultProfileKind { + Off, + Light, + Default, + Aggressive, +} + +impl From for CommitlogFaultProfile { + fn from(profile: CommitlogFaultProfileKind) -> Self { + match profile { + CommitlogFaultProfileKind::Off => Self::Off, + CommitlogFaultProfileKind::Light => Self::Light, + CommitlogFaultProfileKind::Default => Self::Default, + CommitlogFaultProfileKind::Aggressive => Self::Aggressive, + } + } +} + fn main() -> anyhow::Result<()> { init_tracing(); match Cli::parse().command { @@ -80,7 +109,11 @@ fn init_tracing() { fn run_command(args: RunArgs) -> anyhow::Result<()> { let seed = resolve_seed(args.seed); - let config = build_config(args.duration.as_deref(), args.max_interactions)?; + let config = build_config( + args.duration.as_deref(), + args.max_interactions, + args.commitlog_fault_profile, + )?; match args.target.target { TargetKind::RelationalDbCommitlog => { @@ -147,18 +180,25 @@ fn resolve_seed(seed: Option) -> DstSeed { }) } -fn build_config(duration: Option<&str>, max_interactions: Option) -> anyhow::Result { - match (duration, max_interactions) { - (Some(duration), Some(max_interactions)) => Ok(RunConfig { +fn build_config( + duration: Option<&str>, + max_interactions: Option, + commitlog_fault_profile: CommitlogFaultProfileKind, +) -> anyhow::Result { + let config = match (duration, max_interactions) { + (Some(duration), Some(max_interactions)) => RunConfig { max_interactions: Some(max_interactions), max_duration_ms: Some(spacetimedb_dst::config::parse_duration_spec(duration)?.as_millis() as u64), - }), - (Some(duration), None) => RunConfig::with_duration_spec(duration), - (None, Some(max_interactions)) => Ok(RunConfig::with_max_interactions(max_interactions)), - (None, None) => Ok(RunConfig::with_max_interactions(1_000)), - } + ..Default::default() + }, + (Some(duration), None) => RunConfig::with_duration_spec(duration)?, + (None, Some(max_interactions)) => RunConfig::with_max_interactions(max_interactions), + (None, None) => RunConfig::with_max_interactions(1_000), + }; + Ok(config.with_commitlog_fault_profile(commitlog_fault_profile.into())) } +#[allow(clippy::disallowed_macros)] async fn run_target( seed: DstSeed, scenario: D::Scenario, diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs index 80349565828..ebce6c3a34b 100644 --- a/crates/dst/src/schema.rs +++ b/crates/dst/src/schema.rs @@ -59,6 +59,10 @@ pub fn generate_supported_type(rng: &mut DstRng) -> AlgebraicType { } pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { + if rng.index(5) == 0 { + return edge_value_for_type(rng, ty, idx); + } + match ty { AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), AlgebraicType::I8 => AlgebraicValue::I8(((rng.next_u64() % 64) as i8) - 32), @@ -82,6 +86,77 @@ pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) } } +pub fn default_value_for_type(ty: &AlgebraicType) -> AlgebraicValue { + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(false), + AlgebraicType::I8 => AlgebraicValue::I8(0), + AlgebraicType::U8 => AlgebraicValue::U8(0), + AlgebraicType::I16 => AlgebraicValue::I16(0), + AlgebraicType::U16 => AlgebraicValue::U16(0), + AlgebraicType::I32 => AlgebraicValue::I32(0), + AlgebraicType::U32 => AlgebraicValue::U32(0), + AlgebraicType::I64 => AlgebraicValue::I64(0), + AlgebraicType::U64 => AlgebraicValue::U64(0), + AlgebraicType::I128 => AlgebraicValue::I128(0.into()), + AlgebraicType::U128 => AlgebraicValue::U128(0.into()), + AlgebraicType::String => AlgebraicValue::String("".into()), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +pub fn distinct_value_for_type(ty: &AlgebraicType, current: &AlgebraicValue) -> AlgebraicValue { + let default = default_value_for_type(ty); + if &default != current { + return default; + } + + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::I8 => AlgebraicValue::I8(1), + AlgebraicType::U8 => AlgebraicValue::U8(1), + AlgebraicType::I16 => AlgebraicValue::I16(1), + AlgebraicType::U16 => AlgebraicValue::U16(1), + AlgebraicType::I32 => AlgebraicValue::I32(1), + AlgebraicType::U32 => AlgebraicValue::U32(1), + AlgebraicType::I64 => AlgebraicValue::I64(1), + AlgebraicType::U64 => AlgebraicValue::U64(1), + AlgebraicType::I128 => AlgebraicValue::I128(1.into()), + AlgebraicType::U128 => AlgebraicValue::U128(1.into()), + AlgebraicType::String => AlgebraicValue::String("dst_unique_conflict".into()), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +fn edge_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), + AlgebraicType::I8 => [i8::MIN, -1, 0, 1, i8::MAX][rng.index(5)].into(), + AlgebraicType::U8 => [0, 1, u8::MAX][rng.index(3)].into(), + AlgebraicType::I16 => [i16::MIN, -1, 0, 1, i16::MAX][rng.index(5)].into(), + AlgebraicType::U16 => [0, 1, u16::MAX][rng.index(3)].into(), + AlgebraicType::I32 => [i32::MIN, -1, 0, 1, i32::MAX][rng.index(5)].into(), + AlgebraicType::U32 => [0, 1, u32::MAX][rng.index(3)].into(), + AlgebraicType::I64 => [i64::MIN, -1, 0, 1, i64::MAX][rng.index(5)].into(), + AlgebraicType::U64 => [0, 1, u64::MAX.saturating_sub(idx as u64)][rng.index(3)].into(), + AlgebraicType::I128 => { + let value = [i128::MIN, -1, 0, 1, i128::MAX][rng.index(5)]; + AlgebraicValue::I128(value.into()) + } + AlgebraicType::U128 => { + let value = [0, 1, u128::MAX][rng.index(3)]; + AlgebraicValue::U128(value.into()) + } + AlgebraicType::String => match rng.index(5) { + 0 => AlgebraicValue::String("".into()), + 1 => AlgebraicValue::String("same".into()), + 2 => AlgebraicValue::String("x".repeat(512).into()), + 3 => AlgebraicValue::String(format!("edge_{}", char::from_u32(0x2603).expect("valid char")).into()), + _ => AlgebraicValue::String(format!("v{idx}_edge").into()), + }, + other => panic!("unsupported generated column type: {other:?}"), + } +} + impl SimRow { pub fn to_product_value(&self) -> ProductValue { ProductValue::from_iter(self.values.iter().cloned()) diff --git a/crates/dst/src/targets/buggified_repo.rs b/crates/dst/src/targets/buggified_repo.rs index 0c277b677fa..e4bc4ed66f1 100644 --- a/crates/dst/src/targets/buggified_repo.rs +++ b/crates/dst/src/targets/buggified_repo.rs @@ -1,6 +1,10 @@ use std::{ fmt, io::{self, BufRead, Read, Seek, Write}, + sync::{ + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, + Arc, + }, time::Duration, }; @@ -9,29 +13,139 @@ use spacetimedb_commitlog::{ segment::FileLike, }; -const LATENCY_PROBABILITY: f64 = 0.35; -const LONG_LATENCY_PROBABILITY: f64 = 0.08; -const SHORT_IO_PROBABILITY: f64 = 0.08; +use crate::{config::CommitlogFaultProfile, workload::commitlog_ops::DiskFaultSummary}; + +const INJECTED_DISK_ERROR_PREFIX: &str = "dst injected disk "; + +/// Returns true if `text` contains an error created by this fault layer. +pub(crate) fn is_injected_disk_error_text(text: &str) -> bool { + text.contains(INJECTED_DISK_ERROR_PREFIX) +} + +/// Configurable fault profile for a DST-only commitlog repository wrapper. +#[derive(Clone, Copy, Debug)] +pub(crate) struct CommitlogFaultConfig { + profile: CommitlogFaultProfile, + enabled: bool, + latency_prob: f64, + long_latency_prob: f64, + short_io_prob: f64, + read_error_prob: f64, + write_error_prob: f64, + flush_error_prob: f64, + fsync_error_prob: f64, + open_error_prob: f64, + metadata_error_prob: f64, + max_short_io_divisor: usize, +} + +impl CommitlogFaultConfig { + pub(crate) fn for_profile(profile: CommitlogFaultProfile) -> Self { + match profile { + CommitlogFaultProfile::Off => Self { + profile, + enabled: false, + latency_prob: 0.0, + long_latency_prob: 0.0, + short_io_prob: 0.0, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Light => Self { + profile, + enabled: true, + latency_prob: 0.20, + long_latency_prob: 0.04, + short_io_prob: 0.03, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Default => Self { + profile, + enabled: true, + latency_prob: 0.35, + long_latency_prob: 0.08, + short_io_prob: 0.08, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Aggressive => Self { + profile, + enabled: true, + latency_prob: 0.65, + long_latency_prob: 0.18, + short_io_prob: 0.20, + // The current local durability actor does not recover from I/O errors, + // so profile-driven runs stay with latency and short I/O. The counters + // and hooks stay here for targeted tests once the target can classify + // those failures instead of treating them as harness errors. + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 4, + }, + } + } + + pub(crate) fn enabled(&self) -> bool { + self.enabled + } +} /// DST-only repo wrapper that makes the in-memory commitlog backend behave less like RAM. /// -/// Faults stay within normal file API semantics: calls may take deterministic simulated time -/// and `Read` / `Write` may complete partially. The wrapper deliberately avoids corruption or -/// crash-style partial persistence; those need a stronger durability model before we enable them. +/// Faults stay within normal file API semantics: calls may take deterministic simulated time, +/// reads/writes may complete partially, and configured calls may return transient I/O errors. +/// The wrapper deliberately avoids corruption or crash-style partial persistence; those need a +/// stronger durability model before we enable them. #[derive(Clone, Debug)] pub(crate) struct BuggifiedRepo { inner: R, + faults: FaultController, } impl BuggifiedRepo { - pub(crate) fn new(inner: R) -> Self { - Self { inner } + pub(crate) fn new(inner: R, config: CommitlogFaultConfig) -> Self { + Self { + inner, + faults: FaultController::new(config), + } + } + + pub(crate) fn enable_faults(&self) { + self.faults.enable(); + } + + pub(crate) fn fault_summary(&self) -> DiskFaultSummary { + self.faults.summary() + } + + pub(crate) fn with_faults_suspended(&self, f: impl FnOnce() -> T) -> T { + self.faults.with_suspended(f) } } impl fmt::Display for BuggifiedRepo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}+buggified", self.inner) + write!(f, "{}+buggified({})", self.inner, self.faults.config.profile) } } @@ -40,18 +154,27 @@ impl Repo for BuggifiedRepo { type SegmentReader = BuggifiedReader; fn create_segment(&self, offset: u64) -> io::Result { - maybe_disk_latency(); - self.inner.create_segment(offset).map(BuggifiedSegment::new) + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Open)?; + self.inner + .create_segment(offset) + .map(|inner| BuggifiedSegment::new(inner, self.faults.clone())) } fn open_segment_reader(&self, offset: u64) -> io::Result { - maybe_disk_latency(); - self.inner.open_segment_reader(offset).map(BuggifiedReader::new) + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Open)?; + self.inner + .open_segment_reader(offset) + .map(|inner| BuggifiedReader::new(inner, self.faults.clone())) } fn open_segment_writer(&self, offset: u64) -> io::Result { - maybe_disk_latency(); - self.inner.open_segment_writer(offset).map(BuggifiedSegment::new) + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Open)?; + self.inner + .open_segment_writer(offset) + .map(|inner| BuggifiedSegment::new(inner, self.faults.clone())) } fn segment_file_path(&self, offset: u64) -> Option { @@ -59,32 +182,38 @@ impl Repo for BuggifiedRepo { } fn remove_segment(&self, offset: u64) -> io::Result<()> { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.remove_segment(offset) } fn compress_segment(&self, offset: u64) -> io::Result<()> { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.compress_segment(offset) } fn existing_offsets(&self) -> io::Result> { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.existing_offsets() } fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.create_offset_index(offset, cap) } fn remove_offset_index(&self, offset: TxOffset) -> io::Result<()> { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.remove_offset_index(offset) } fn get_offset_index(&self, offset: TxOffset) -> io::Result { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.get_offset_index(offset) } } @@ -93,82 +222,92 @@ impl RepoWithoutLockFile for BuggifiedRepo {} pub(crate) struct BuggifiedSegment { inner: S, + faults: FaultController, } impl BuggifiedSegment { - fn new(inner: S) -> Self { - Self { inner } + fn new(inner: S, faults: FaultController) -> Self { + Self { inner, faults } } } impl Read for BuggifiedSegment { fn read(&mut self, buf: &mut [u8]) -> io::Result { - maybe_disk_latency(); - let len = maybe_short_len(buf.len()); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Read)?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); self.inner.read(&mut buf[..len]) } } impl Write for BuggifiedSegment { fn write(&mut self, buf: &[u8]) -> io::Result { - maybe_disk_latency(); - let len = maybe_short_len(buf.len()); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Write)?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Write); self.inner.write(&buf[..len]) } fn flush(&mut self) -> io::Result<()> { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Flush)?; self.inner.flush() } } impl Seek for BuggifiedSegment { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); self.inner.seek(pos) } } impl SegmentLen for BuggifiedSegment { fn segment_len(&mut self) -> io::Result { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.segment_len() } } impl FileLike for BuggifiedSegment { fn fsync(&mut self) -> io::Result<()> { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Fsync)?; self.inner.fsync() } fn ftruncate(&mut self, tx_offset: u64, size: u64) -> io::Result<()> { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.ftruncate(tx_offset, size) } } pub(crate) struct BuggifiedReader { inner: S, + faults: FaultController, } impl BuggifiedReader { - fn new(inner: S) -> Self { - Self { inner } + fn new(inner: S, faults: FaultController) -> Self { + Self { inner, faults } } } impl Read for BuggifiedReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { - maybe_disk_latency(); - let len = maybe_short_len(buf.len()); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Read)?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); self.inner.read(&mut buf[..len]) } } impl BufRead for BuggifiedReader { fn fill_buf(&mut self) -> io::Result<&[u8]> { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Read)?; self.inner.fill_buf() } @@ -179,14 +318,15 @@ impl BufRead for BuggifiedReader { impl Seek for BuggifiedReader { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); self.inner.seek(pos) } } impl SegmentLen for BuggifiedReader { fn segment_len(&mut self) -> io::Result { - maybe_disk_latency(); + self.faults.maybe_disk_latency(); + self.faults.maybe_error(FaultKind::Metadata)?; self.inner.segment_len() } } @@ -197,41 +337,187 @@ impl SegmentReader for BuggifiedReader { } } -fn maybe_disk_latency() { - #[cfg(madsim)] - { - if madsim::buggify::buggify_with_prob(LATENCY_PROBABILITY) { - let latency = if madsim::buggify::buggify_with_prob(LONG_LATENCY_PROBABILITY) { +#[derive(Clone, Debug)] +struct FaultController { + config: CommitlogFaultConfig, + counters: Arc, + armed: Arc, + suspended: Arc, +} + +impl FaultController { + fn new(config: CommitlogFaultConfig) -> Self { + Self { + config, + counters: Arc::default(), + armed: Arc::new(AtomicBool::new(false)), + suspended: Arc::default(), + } + } + + fn enable(&self) { + self.armed.store(true, Ordering::Relaxed); + } + + fn active(&self) -> bool { + self.config.enabled() && self.armed.load(Ordering::Relaxed) && self.suspended.load(Ordering::Relaxed) == 0 + } + + fn with_suspended(&self, f: impl FnOnce() -> T) -> T { + self.suspended.fetch_add(1, Ordering::Relaxed); + let _guard = SuspendFaultsGuard { + suspended: self.suspended.clone(), + }; + f() + } + + fn maybe_disk_latency(&self) { + if self.sample(self.config.latency_prob) { + self.counters.latency.fetch_add(1, Ordering::Relaxed); + let latency = if self.sample(self.config.long_latency_prob) { Duration::from_millis(25) } else { Duration::from_millis(1) }; + #[cfg(madsim)] madsim::time::advance(latency); + #[cfg(not(madsim))] + let _ = latency; } } - #[cfg(not(madsim))] - { - let _ = (LATENCY_PROBABILITY, LONG_LATENCY_PROBABILITY, Duration::ZERO); + fn maybe_error(&self, kind: FaultKind) -> io::Result<()> { + if self.sample(kind.probability(&self.config)) { + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + return Err(io::Error::other(kind.message())); + } + Ok(()) } + + fn maybe_short_len(&self, len: usize, kind: ShortIoKind) -> usize { + if len <= 1 { + return len; + } + if !self.sample(self.config.short_io_prob) { + return len; + } + + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + let divisor = self.config.max_short_io_divisor.max(2); + (len / divisor).max(1) + } + + fn sample(&self, probability: f64) -> bool { + if !self.active() || probability <= 0.0 { + return false; + } + + #[cfg(madsim)] + { + madsim::buggify::buggify_with_prob(probability) + } + #[cfg(not(madsim))] + { + let _ = probability; + false + } + } + + fn summary(&self) -> DiskFaultSummary { + DiskFaultSummary { + profile: self.config.profile, + latency: self.counters.latency.load(Ordering::Relaxed) as usize, + short_read: self.counters.short_read.load(Ordering::Relaxed) as usize, + short_write: self.counters.short_write.load(Ordering::Relaxed) as usize, + read_error: self.counters.read_error.load(Ordering::Relaxed) as usize, + write_error: self.counters.write_error.load(Ordering::Relaxed) as usize, + flush_error: self.counters.flush_error.load(Ordering::Relaxed) as usize, + fsync_error: self.counters.fsync_error.load(Ordering::Relaxed) as usize, + open_error: self.counters.open_error.load(Ordering::Relaxed) as usize, + metadata_error: self.counters.metadata_error.load(Ordering::Relaxed) as usize, + } + } +} + +struct SuspendFaultsGuard { + suspended: Arc, } -fn maybe_short_len(len: usize) -> usize { - if len <= 1 { - return len; +impl Drop for SuspendFaultsGuard { + fn drop(&mut self) { + self.suspended.fetch_sub(1, Ordering::Relaxed); } +} + +#[derive(Debug, Default)] +struct FaultCounters { + latency: AtomicU64, + short_read: AtomicU64, + short_write: AtomicU64, + read_error: AtomicU64, + write_error: AtomicU64, + flush_error: AtomicU64, + fsync_error: AtomicU64, + open_error: AtomicU64, + metadata_error: AtomicU64, +} - #[cfg(madsim)] - { - if madsim::buggify::buggify_with_prob(SHORT_IO_PROBABILITY) { - return (len / 2).max(1); +#[derive(Clone, Copy)] +enum ShortIoKind { + Read, + Write, +} + +impl ShortIoKind { + fn counter(self, counters: &FaultCounters) -> &AtomicU64 { + match self { + Self::Read => &counters.short_read, + Self::Write => &counters.short_write, } } +} - #[cfg(not(madsim))] - { - let _ = SHORT_IO_PROBABILITY; +#[derive(Clone, Copy)] +enum FaultKind { + Read, + Write, + Flush, + Fsync, + Open, + Metadata, +} + +impl FaultKind { + fn probability(self, config: &CommitlogFaultConfig) -> f64 { + match self { + Self::Read => config.read_error_prob, + Self::Write => config.write_error_prob, + Self::Flush => config.flush_error_prob, + Self::Fsync => config.fsync_error_prob, + Self::Open => config.open_error_prob, + Self::Metadata => config.metadata_error_prob, + } } - len + fn counter(self, counters: &FaultCounters) -> &AtomicU64 { + match self { + Self::Read => &counters.read_error, + Self::Write => &counters.write_error, + Self::Flush => &counters.flush_error, + Self::Fsync => &counters.fsync_error, + Self::Open => &counters.open_error, + Self::Metadata => &counters.metadata_error, + } + } + + fn message(self) -> &'static str { + match self { + Self::Read => "dst injected disk read error", + Self::Write => "dst injected disk write error", + Self::Flush => "dst injected disk flush error", + Self::Fsync => "dst injected disk fsync error", + Self::Open => "dst injected disk open error", + Self::Metadata => "dst injected disk metadata error", + } + } } diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index 2e179a7026f..39c4f29d7ab 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -33,58 +33,98 @@ impl TargetDescriptor for RelationalDbCommitlogDescriptor { let outcome = crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config) .await?; - let alive_tasks = outcome - .runtime - .runtime_alive_tasks - .map(|count| count.to_string()) - .unwrap_or_else(|| "unknown".to_string()); - Ok(format!( - "ok target={} seed={} steps={} schema_tables={} schema_columns={} schema_max_columns={} schema_indexes={} schema_extra_indexes={} durable_commits={} replay_tables={} table_ops={} creates={} drops={} migrates={} syncs={} reopens={} reopen_skipped={} skipped={} op_begin={} op_commit={} op_rollback={} op_insert={} op_delete={} op_dup_insert={} op_missing_delete={} op_batch_insert={} op_batch_delete={} op_reinsert={} op_point_lookup={} op_predicate_count={} op_range_scan={} op_full_scan={} tx_begin={} tx_commit={} tx_rollback={} auto_commit={} read_tx={} known_tasks={} durability_actors={} alive_tasks={}", - Self::NAME, - seed.0, - outcome.applied_steps, - outcome.schema.initial_tables, - outcome.schema.initial_columns, - outcome.schema.max_columns_per_table, - outcome.schema.initial_indexes, - outcome.schema.extra_indexes, - outcome.durable_commit_count, - outcome.replay_table_count, - outcome.interactions.table, - outcome.interactions.create_dynamic_table, - outcome.interactions.drop_dynamic_table, - outcome.interactions.migrate_dynamic_table, - outcome.interactions.chaos_sync, - outcome.interactions.close_reopen_applied, - outcome.interactions.close_reopen_skipped, - outcome.interactions.skipped, - outcome.table_ops.begin_tx, - outcome.table_ops.commit_tx, - outcome.table_ops.rollback_tx, - outcome.table_ops.insert, - outcome.table_ops.delete, - outcome.table_ops.duplicate_insert, - outcome.table_ops.delete_missing, - outcome.table_ops.batch_insert, - outcome.table_ops.batch_delete, - outcome.table_ops.reinsert, - outcome.table_ops.point_lookup, - outcome.table_ops.predicate_count, - outcome.table_ops.range_scan, - outcome.table_ops.full_scan, - outcome.transactions.explicit_begin, - outcome.transactions.explicit_commit, - outcome.transactions.explicit_rollback, - outcome.transactions.auto_commit, - outcome.transactions.read_tx, - outcome.runtime.known_tokio_tasks_scheduled, - outcome.runtime.durability_actors_started, - alive_tasks - )) + Ok(format_relational_db_commitlog_outcome(Self::NAME, seed, &outcome)) }) } } +fn format_relational_db_commitlog_outcome( + target: &str, + seed: DstSeed, + outcome: &crate::targets::relational_db_commitlog::RelationalDbCommitlogOutcome, +) -> String { + let alive_tasks = outcome + .runtime + .runtime_alive_tasks + .map(|count| count.to_string()) + .unwrap_or_else(|| "unknown".to_string()); + + format!( + concat!( + "ok target={} seed={} steps={}\n", + "\n", + "schema: tables={} columns={} max_columns={} indexes={} extra_indexes={}\n", + "durability: durable_commits={} replay_tables={}\n", + "interactions: table={} creates={} drops={} migrates={} syncs={} reopens={} reopen_skipped={} skipped={}\n", + "table_ops:\n", + " tx_control: begin={} commit={} rollback={} begin_read={} release_read={} begin_conflict={} write_conflict={}\n", + " writes: insert={} delete={} exact_dup={} unique_conflict={} missing_delete={} batch_insert={} batch_delete={} reinsert={}\n", + " schema: add_column={} add_index={}\n", + " reads: point_lookup={} predicate_count={} range_scan={} full_scan={}\n", + "transactions: begin={} commit={} rollback={} auto_commit={} read_tx={}\n", + "disk_faults: profile={} latency={} short_read={} short_write={} errors(read={} write={} flush={} fsync={} open={} metadata={})\n", + "runtime: known_tasks={} durability_actors={} alive_tasks={}" + ), + target, + seed.0, + outcome.applied_steps, + outcome.schema.initial_tables, + outcome.schema.initial_columns, + outcome.schema.max_columns_per_table, + outcome.schema.initial_indexes, + outcome.schema.extra_indexes, + outcome.durable_commit_count, + outcome.replay_table_count, + outcome.interactions.table, + outcome.interactions.create_dynamic_table, + outcome.interactions.drop_dynamic_table, + outcome.interactions.migrate_dynamic_table, + outcome.interactions.chaos_sync, + outcome.interactions.close_reopen_applied, + outcome.interactions.close_reopen_skipped, + outcome.interactions.skipped, + outcome.table_ops.begin_tx, + outcome.table_ops.commit_tx, + outcome.table_ops.rollback_tx, + outcome.table_ops.begin_read_tx, + outcome.table_ops.release_read_tx, + outcome.table_ops.begin_tx_conflict, + outcome.table_ops.write_conflict_insert, + outcome.table_ops.insert, + outcome.table_ops.delete, + outcome.table_ops.exact_duplicate_insert, + outcome.table_ops.unique_key_conflict_insert, + outcome.table_ops.delete_missing, + outcome.table_ops.batch_insert, + outcome.table_ops.batch_delete, + outcome.table_ops.reinsert, + outcome.table_ops.add_column, + outcome.table_ops.add_index, + outcome.table_ops.point_lookup, + outcome.table_ops.predicate_count, + outcome.table_ops.range_scan, + outcome.table_ops.full_scan, + outcome.transactions.explicit_begin, + outcome.transactions.explicit_commit, + outcome.transactions.explicit_rollback, + outcome.transactions.auto_commit, + outcome.transactions.read_tx, + outcome.disk_faults.profile, + outcome.disk_faults.latency, + outcome.disk_faults.short_read, + outcome.disk_faults.short_write, + outcome.disk_faults.read_error, + outcome.disk_faults.write_error, + outcome.disk_faults.flush_error, + outcome.disk_faults.fsync_error, + outcome.disk_faults.open_error, + outcome.disk_faults.metadata_error, + outcome.runtime.known_tokio_tasks_scheduled, + outcome.runtime.durability_actors_started, + alive_tasks + ) +} + pub struct StandaloneHostDescriptor; impl TargetDescriptor for StandaloneHostDescriptor { diff --git a/crates/dst/src/targets/properties.rs b/crates/dst/src/targets/properties.rs index 773da80e104..0051bc6416f 100644 --- a/crates/dst/src/targets/properties.rs +++ b/crates/dst/src/targets/properties.rs @@ -11,7 +11,7 @@ use crate::{ core::StreamingProperties, schema::{SchemaPlan, SimRow}, workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, table_ops::{ ExpectedErrorKind, ExpectedModel, ExpectedResult, TableOperation, TableScenario, TableWorkloadInteraction, TableWorkloadOutcome, @@ -45,6 +45,7 @@ pub(crate) enum PropertyKind { IndexRangeExcluded, BankingTablesMatch, DynamicMigrationAutoInc, + DurableReplayMatchesModel, ExpectedErrorMatches, PointLookupMatchesModel, PredicateCountMatchesModel, @@ -112,6 +113,7 @@ pub(crate) enum CommitlogObservation { Applied, Skipped, DynamicMigrationProbe(DynamicMigrationProbe), + DurableReplay(DurableReplaySummary), } #[derive(Clone, Debug)] @@ -176,6 +178,7 @@ pub(crate) enum PropertyEvent<'a> { }, CommitOrRollback, DynamicMigrationProbe(&'a DynamicMigrationProbe), + DurableReplay(&'a DurableReplaySummary), TableWorkloadFinished(&'a TableWorkloadOutcome), } @@ -252,6 +255,9 @@ impl PropertyRuntime { PropertyKind::DynamicMigrationAutoInc => { rules.push(RuleEntry::new(*kind, Box::::default())) } + PropertyKind::DurableReplayMatchesModel => { + rules.push(RuleEntry::new(*kind, Box::::default())) + } PropertyKind::ExpectedErrorMatches => { rules.push(RuleEntry::new(*kind, Box::::default())) } @@ -279,8 +285,10 @@ impl PropertyRuntime { where S: TableScenario + 'static, { - let mut runtime = Self::default(); - runtime.models = PropertyModels::new(schema.tables.len(), num_connections); + let mut runtime = Self { + models: PropertyModels::new(schema.tables.len(), num_connections), + ..Self::default() + }; runtime .rules .push(RuleEntry::non_periodic(Box::new(ExpectedTableStateRule::new( @@ -295,15 +303,22 @@ impl PropertyRuntime { interaction: &TableWorkloadInteraction, ) -> Result<(), String> { match &interaction.op { - TableOperation::BeginTx { .. } | TableOperation::CommitTx { .. } | TableOperation::RollbackTx { .. } => { - self.models.apply(interaction) - } + TableOperation::BeginTx { .. } + | TableOperation::CommitTx { .. } + | TableOperation::RollbackTx { .. } + | TableOperation::BeginReadTx { .. } + | TableOperation::ReleaseReadTx { .. } => self.models.apply(interaction), TableOperation::BatchInsert { .. } | TableOperation::BatchDelete { .. } - | TableOperation::Reinsert { .. } => self.models.apply(interaction), + | TableOperation::Reinsert { .. } + | TableOperation::AddColumn { .. } + | TableOperation::AddIndex { .. } => self.models.apply(interaction), TableOperation::Insert { .. } | TableOperation::Delete { .. } - | TableOperation::DuplicateInsert { .. } + | TableOperation::BeginTxConflict { .. } + | TableOperation::WriteConflictInsert { .. } + | TableOperation::ExactDuplicateInsert { .. } + | TableOperation::UniqueKeyConflictInsert { .. } | TableOperation::DeleteMissing { .. } | TableOperation::PointLookup { .. } | TableOperation::PredicateCount { .. } @@ -456,6 +471,7 @@ impl PropertyRuntime { Ok(()) } + #[allow(clippy::too_many_arguments)] pub fn on_range_scan( &mut self, access: &dyn TargetPropertyAccess, @@ -531,6 +547,21 @@ impl PropertyRuntime { Ok(()) } + pub fn on_durable_replay( + &mut self, + access: &dyn TargetPropertyAccess, + replay: &DurableReplaySummary, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe(&ctx, PropertyEvent::DurableReplay(replay))?; + } + Ok(()) + } + pub fn on_table_workload_finish( &mut self, access: &dyn TargetPropertyAccess, @@ -623,6 +654,7 @@ where self.observe_table_observation(engine, table_interaction, table_observation) } (_, CommitlogObservation::DynamicMigrationProbe(probe)) => self.on_dynamic_migration_probe(engine, probe), + (_, CommitlogObservation::DurableReplay(replay)) => self.on_durable_replay(engine, replay), (_, CommitlogObservation::Applied | CommitlogObservation::Skipped) => Ok(()), (other, observation) => Err(format!( "observation {observation:?} does not match interaction {other:?}" @@ -631,6 +663,7 @@ where } fn finish(&mut self, engine: &E, outcome: &CommitlogWorkloadOutcome) -> Result<(), String> { + self.on_durable_replay(engine, &outcome.replay)?; self.on_table_workload_finish(engine, &outcome.table) } } @@ -660,6 +693,7 @@ impl Default for PropertyRuntime { PropertyKind::IndexRangeExcluded, PropertyKind::BankingTablesMatch, PropertyKind::DynamicMigrationAutoInc, + PropertyKind::DurableReplayMatchesModel, PropertyKind::ExpectedErrorMatches, PropertyKind::PointLookupMatchesModel, PropertyKind::PredicateCountMatchesModel, @@ -960,6 +994,25 @@ impl PropertyRule for DynamicMigrationAutoIncRule { } } +#[derive(Default)] +struct DurableReplayMatchesModelRule; + +impl PropertyRule for DurableReplayMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::DurableReplay(replay) = event else { + return Ok(()); + }; + let expected_rows = ctx.models.table().committed_rows(); + if replay.base_rows != expected_rows { + return Err(format!( + "[DurableReplayMatchesModel] replayed durable state mismatch at offset {:?}: expected={expected_rows:?} actual={:?}", + replay.durable_offset, replay.base_rows + )); + } + Ok(()) + } +} + #[derive(Default)] struct ExpectedErrorMatchesRule; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 1c3cdbc5b84..392e034aad0 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -4,7 +4,7 @@ use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, pa use spacetimedb_commitlog::repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}; use spacetimedb_core::{ - db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB}, + db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB, Tx as RelTx}, error::{DBError, DatastoreError, IndexError}, messages::control_db::HostType, }; @@ -28,16 +28,16 @@ use spacetimedb_table::page_pool::PagePool; use tracing::{debug, info, trace}; use crate::{ - config::RunConfig, + config::{CommitlogFaultProfile, RunConfig}, core::{self, TargetEngine}, schema::{SchemaPlan, SimRow}, seed::DstSeed, - targets::buggified_repo::BuggifiedRepo, + targets::buggified_repo::{is_injected_disk_error_text, BuggifiedRepo, CommitlogFaultConfig}, targets::properties::{ CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableObservation, TargetPropertyAccess, }, workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome}, + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, table_ops::{ ConnectionWriteState, ExpectedErrorKind, TableOperation, TableScenario, TableScenarioId, @@ -47,7 +47,7 @@ use crate::{ }; pub type RelationalDbCommitlogOutcome = CommitlogWorkloadOutcome; -type RelationalDbCommitlogSource = crate::workload::commitlog_ops::NextInteractionGeneratorComposite; +type RelationalDbCommitlogSource = crate::workload::commitlog_ops::CommitlogWorkloadSource; type RelationalDbCommitlogProperties = PropertyRuntime; pub async fn run_generated_with_config_and_scenario( @@ -79,14 +79,14 @@ fn build( let num_connections = connection_rng.index(3) + 1; let mut schema_rng = seed.fork(122).rng(); let schema = scenario.generate_schema(&mut schema_rng); - let generator = crate::workload::commitlog_ops::NextInteractionGeneratorComposite::new( + let generator = crate::workload::commitlog_ops::CommitlogWorkloadSource::new( seed, - scenario.clone(), + scenario, schema.clone(), num_connections, config.max_interactions_or_default(usize::MAX), ); - let engine = RelationalDbEngine::new(seed, &schema, num_connections)?; + let engine = RelationalDbEngine::new(seed, &schema, num_connections, config.commitlog_fault_profile)?; let properties = PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections); Ok((generator, engine, properties)) } @@ -139,7 +139,9 @@ impl RunStats { if matches!(interaction, CommitlogInteraction::CloseReopen) { match observation { CommitlogObservation::Skipped => self.interactions.close_reopen_skipped += 1, - CommitlogObservation::Applied => self.interactions.close_reopen_applied += 1, + CommitlogObservation::Applied | CommitlogObservation::DurableReplay(_) => { + self.interactions.close_reopen_applied += 1 + } _ => {} } } @@ -150,13 +152,20 @@ impl RunStats { TableOperation::BeginTx { .. } => self.table_ops.begin_tx += 1, TableOperation::CommitTx { .. } => self.table_ops.commit_tx += 1, TableOperation::RollbackTx { .. } => self.table_ops.rollback_tx += 1, + TableOperation::BeginReadTx { .. } => self.table_ops.begin_read_tx += 1, + TableOperation::ReleaseReadTx { .. } => self.table_ops.release_read_tx += 1, + TableOperation::BeginTxConflict { .. } => self.table_ops.begin_tx_conflict += 1, + TableOperation::WriteConflictInsert { .. } => self.table_ops.write_conflict_insert += 1, TableOperation::Insert { .. } => self.table_ops.insert += 1, TableOperation::Delete { .. } => self.table_ops.delete += 1, - TableOperation::DuplicateInsert { .. } => self.table_ops.duplicate_insert += 1, + TableOperation::ExactDuplicateInsert { .. } => self.table_ops.exact_duplicate_insert += 1, + TableOperation::UniqueKeyConflictInsert { .. } => self.table_ops.unique_key_conflict_insert += 1, TableOperation::DeleteMissing { .. } => self.table_ops.delete_missing += 1, TableOperation::BatchInsert { .. } => self.table_ops.batch_insert += 1, TableOperation::BatchDelete { .. } => self.table_ops.batch_delete += 1, TableOperation::Reinsert { .. } => self.table_ops.reinsert += 1, + TableOperation::AddColumn { .. } => self.table_ops.add_column += 1, + TableOperation::AddIndex { .. } => self.table_ops.add_index += 1, TableOperation::PointLookup { .. } => self.table_ops.point_lookup += 1, TableOperation::PredicateCount { .. } => self.table_ops.predicate_count += 1, TableOperation::RangeScan { .. } => self.table_ops.range_scan += 1, @@ -194,13 +203,13 @@ impl RunStats { struct RelationalDbEngine { db: Option, execution: ConnectionWriteState, + read_tx_by_connection: Vec>, base_schema: SchemaPlan, base_table_ids: Vec, dynamic_tables: BTreeMap, step: usize, + last_requested_durable_offset: Option, last_observed_durable_offset: Option, - last_durable_snapshot: DurableSnapshot, - pending_snapshot_capture: bool, durability: Arc, durability_opts: spacetimedb_durability::local::Options, runtime_handle: tokio::runtime::Handle, @@ -209,35 +218,39 @@ struct RelationalDbEngine { _runtime_guard: Option, } -type DurableSnapshot = BTreeMap>; - impl RelationalDbEngine { - fn new(seed: DstSeed, schema: &SchemaPlan, num_connections: usize) -> anyhow::Result { - let (db, runtime_handle, commitlog_repo, durability, durability_opts, runtime_guard) = - bootstrap_relational_db(seed.fork(700))?; + fn new( + seed: DstSeed, + schema: &SchemaPlan, + num_connections: usize, + fault_profile: CommitlogFaultProfile, + ) -> anyhow::Result { + let bootstrap = bootstrap_relational_db(seed.fork(700), fault_profile)?; let mut this = Self { - db: Some(db), + db: Some(bootstrap.db), execution: ConnectionWriteState::new(num_connections), + read_tx_by_connection: (0..num_connections).map(|_| None).collect(), base_schema: schema.clone(), base_table_ids: Vec::with_capacity(schema.tables.len()), dynamic_tables: BTreeMap::new(), step: 0, + last_requested_durable_offset: None, last_observed_durable_offset: None, - last_durable_snapshot: BTreeMap::new(), - pending_snapshot_capture: false, - durability, - durability_opts, - runtime_handle, - commitlog_repo, + durability: bootstrap.durability, + durability_opts: bootstrap.durability_opts, + runtime_handle: bootstrap.runtime_handle, + commitlog_repo: bootstrap.commitlog_repo, stats: RunStats { runtime: RuntimeStats { durability_actors_started: 1, }, ..Default::default() }, - _runtime_guard: runtime_guard, + _runtime_guard: bootstrap.runtime_guard, }; this.install_base_schema().map_err(anyhow::Error::msg)?; + this.refresh_observed_durable_offset(true).map_err(anyhow::Error::msg)?; + this.commitlog_repo.enable_faults(); Ok(this) } @@ -290,37 +303,43 @@ impl RelationalDbEngine { .map_err(|err| format!("create table '{}' failed: {err}", table.name))?; self.base_table_ids.push(table_id); } - self.db()? + let committed = self + .db()? .commit_tx(tx) - .map(|_| ()) - .map_err(|err| format!("install base schema commit failed: {err}")) + .map_err(|err| format!("install base schema commit failed: {err}"))?; + self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); + Ok(()) } async fn execute(&mut self, interaction: &CommitlogInteraction) -> Result { self.step = self.step.saturating_add(1); self.stats.record_interaction_requested(interaction); + let force_sync_after = matches!(interaction, CommitlogInteraction::ChaosSync); let observation = match interaction { CommitlogInteraction::Table(op) => self.execute_table_op(op).map(CommitlogObservation::Table), CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), - CommitlogInteraction::ChaosSync => { - self.sync_and_snapshot(true)?; - Ok(CommitlogObservation::Applied) - } + CommitlogInteraction::ChaosSync => Ok(CommitlogObservation::Applied), CommitlogInteraction::CloseReopen => self.close_and_reopen().await, }?; + if !matches!(interaction, CommitlogInteraction::CloseReopen) { + self.wait_for_requested_durability(force_sync_after).await?; + } self.stats.record_interaction_result(interaction, &observation); Ok(observation) } async fn close_and_reopen(&mut self) -> Result { - if self.execution.active_writer.is_some() || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) { + if self.execution.active_writer.is_some() + || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) + || self.read_tx_by_connection.iter().any(|tx| tx.is_some()) + { trace!("skip close/reopen while transaction is open"); return Ok(CommitlogObservation::Skipped); } - self.sync_and_snapshot(true)?; + self.wait_for_requested_durability(true).await?; // Explicitly drop the current RelationalDB instance before attempting // to open a new durability+DB pair on the same replica directory. let old_db = self @@ -331,6 +350,37 @@ impl RelationalDbEngine { drop(old_db); info!("starting in-memory durability"); + let (durability, db) = self.reopen_from_history_with_fault_retry("close/reopen")?; + + self.stats.runtime.durability_actors_started += 1; + self.durability = durability; + self.db = Some(db); + self.rebuild_table_handles_after_reopen()?; + self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); + let replay = self.durable_replay_summary()?; + debug!( + base_tables = self.base_table_ids.len(), + dynamic_tables = self.dynamic_tables.len(), + "reopened relational db from durable history" + ); + Ok(CommitlogObservation::DurableReplay(replay)) + } + + fn reopen_from_history_with_fault_retry( + &self, + context: &'static str, + ) -> Result<(Arc, RelationalDB), String> { + match self.reopen_from_history() { + Ok(reopened) => Ok(reopened), + Err(err) if is_injected_disk_error_text(&err) => { + trace!(error = %err, "retrying {context} with injected disk faults suspended"); + self.commitlog_repo.with_faults_suspended(|| self.reopen_from_history()) + } + Err(err) => Err(err), + } + } + + fn reopen_from_history(&self) -> Result<(Arc, RelationalDB), String> { let durability = Arc::new( InMemoryCommitlogDurability::open_with_repo( self.commitlog_repo.clone(), @@ -359,17 +409,7 @@ impl RelationalDbEngine { "unexpected connected clients after reopen: {connected_clients:?}" )); } - self.stats.runtime.durability_actors_started += 1; - self.durability = durability; - self.db = Some(db); - self.rebuild_table_handles_after_reopen()?; - self.capture_pending_snapshot_if_idle()?; - debug!( - base_tables = self.base_table_ids.len(), - dynamic_tables = self.dynamic_tables.len(), - "reopened relational db from durable history" - ); - Ok(CommitlogObservation::Applied) + Ok((durability, db)) } fn rebuild_table_handles_after_reopen(&mut self) -> Result<(), String> { @@ -426,6 +466,9 @@ impl RelationalDbEngine { match &interaction.op { TableOperation::BeginTx { conn } => { self.execution.ensure_known_connection(*conn)?; + if self.read_tx_by_connection[*conn].is_some() { + return Err(format!("connection {conn} already has open read transaction")); + } if self.execution.tx_by_connection[*conn].is_some() { return Err(format!("connection {conn} already has open transaction")); } @@ -442,17 +485,66 @@ impl RelationalDbEngine { self.stats.transactions.explicit_begin += 1; Ok(TableObservation::Applied) } + TableOperation::BeginReadTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + if self.execution.tx_by_connection[*conn].is_some() { + return Err(format!("connection {conn} already has open write transaction")); + } + if self.read_tx_by_connection[*conn].is_some() { + return Err(format!("connection {conn} already has open read transaction")); + } + let tx = self.db()?.begin_tx(Workload::ForTests); + self.read_tx_by_connection[*conn] = Some(tx); + self.stats.record_read_tx(); + Ok(TableObservation::Applied) + } + TableOperation::ReleaseReadTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + let tx = self.read_tx_by_connection[*conn] + .take() + .ok_or_else(|| format!("connection {conn} has no read transaction to release"))?; + let _ = self.db()?.release_tx(tx); + Ok(TableObservation::Applied) + } + TableOperation::BeginTxConflict { owner, conn } => { + self.expect_write_conflict(*owner, *conn)?; + Ok(TableObservation::ExpectedError(ExpectedErrorKind::WriteConflict)) + } + TableOperation::WriteConflictInsert { + owner, + conn, + table, + row, + } => { + self.expect_write_conflict(*owner, *conn)?; + let err = self + .with_mut_tx(*conn, |engine, tx| { + let table_id = engine.table_id_for_index(*table)?; + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + engine + .db()? + .insert(tx, table_id, &bsatn) + .map_err(|err| format!("conflicting insert unexpectedly reached datastore: {err}"))?; + Ok(()) + }) + .expect_err("active writer should reject conflicting auto-commit write"); + if !err.contains("owns lock") { + return Err(format!("write conflict returned wrong error: {err}")); + } + Ok(TableObservation::ExpectedError(ExpectedErrorKind::WriteConflict)) + } TableOperation::CommitTx { conn } => { self.execution.ensure_writer_owner(*conn, "commit")?; let tx = self.execution.tx_by_connection[*conn] .take() .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; - self.db()? + let committed = self + .db()? .commit_tx(tx) .map_err(|err| format!("commit interaction failed: {err}"))?; + self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); self.execution.active_writer = None; self.stats.transactions.explicit_commit += 1; - self.capture_pending_snapshot_if_idle()?; Ok(TableObservation::CommitOrRollback) } TableOperation::RollbackTx { conn } => { @@ -463,7 +555,6 @@ impl RelationalDbEngine { let _ = self.db()?.rollback_mut_tx(tx); self.execution.active_writer = None; self.stats.transactions.explicit_rollback += 1; - self.capture_pending_snapshot_if_idle()?; Ok(TableObservation::CommitOrRollback) } TableOperation::Insert { conn, table, row } => { @@ -481,7 +572,7 @@ impl RelationalDbEngine { Ok(SimRow::from_product_value(row_ref.to_product_value())) })?; if !in_tx { - self.sync_and_snapshot(false)?; + self.refresh_observed_durable_offset(false)?; } Ok(TableObservation::RowInserted { conn: *conn, @@ -504,7 +595,7 @@ impl RelationalDbEngine { Ok(()) })?; if !in_tx { - self.sync_and_snapshot(false)?; + self.refresh_observed_durable_offset(false)?; } Ok(TableObservation::RowDeleted { conn: *conn, @@ -513,7 +604,35 @@ impl RelationalDbEngine { in_tx, }) } - TableOperation::DuplicateInsert { conn, table, row } => { + TableOperation::ExactDuplicateInsert { conn, table, row } => { + let in_tx = self.execution.tx_by_connection[*conn].is_some(); + let before = self.collect_rows_in_connection(*conn, *table)?; + let inserted_row = self.with_mut_tx(*conn, |engine, tx| { + let table_id = engine.table_id_for_index(*table)?; + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + let (_, row_ref, _) = engine + .db()? + .insert(tx, table_id, &bsatn) + .map_err(|err| format!("exact duplicate insert failed: {err}"))?; + Ok(SimRow::from_product_value(row_ref.to_product_value())) + })?; + if !in_tx { + self.refresh_observed_durable_offset(false)?; + } + let after = self.collect_rows_in_connection(*conn, *table)?; + if &inserted_row != row { + return Err(format!( + "[ExactDuplicateInsertNoOp] returned row mismatch: expected={row:?}, actual={inserted_row:?}; interaction={interaction:?}" + )); + } + if after != before { + return Err(format!( + "[ExactDuplicateInsertNoOp] changed visible rows: before={before:?}, after={after:?}; interaction={interaction:?}" + )); + } + Ok(TableObservation::Applied) + } + TableOperation::UniqueKeyConflictInsert { conn, table, row } => { let outcome = self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine .base_table_ids @@ -521,10 +640,10 @@ impl RelationalDbEngine { .ok_or_else(|| format!("table {table} out of range"))?; let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; match engine.db()?.insert(tx, table_id, &bsatn) { - Ok(_) => Ok(Err("duplicate insert unexpectedly succeeded".to_string())), + Ok(_) => Ok(Err("unique-key conflict insert unexpectedly succeeded".to_string())), Err(err) if is_unique_constraint_violation(&err) => Ok(Ok(())), Err(err) => Ok(Err(format!( - "duplicate insert returned wrong error: expected={:?}, actual={err}", + "unique-key conflict insert returned wrong error: expected={:?}, actual={err}", ExpectedErrorKind::UniqueConstraintViolation ))), } @@ -569,7 +688,7 @@ impl RelationalDbEngine { Ok(()) })?; if !in_tx { - self.sync_and_snapshot(false)?; + self.refresh_observed_durable_offset(false)?; } Ok(TableObservation::Applied) } @@ -589,7 +708,7 @@ impl RelationalDbEngine { Ok(()) })?; if !in_tx { - self.sync_and_snapshot(false)?; + self.refresh_observed_durable_offset(false)?; } Ok(TableObservation::Applied) } @@ -612,10 +731,61 @@ impl RelationalDbEngine { Ok(()) })?; if !in_tx { - self.sync_and_snapshot(false)?; + self.refresh_observed_durable_offset(false)?; } Ok(TableObservation::Applied) } + TableOperation::AddColumn { + conn, + table, + column, + default, + } => { + let table_id = self.with_mut_tx(*conn, |engine, tx| { + let table_id = engine.table_id_for_index(*table)?; + let column_idx = engine.base_schema.tables[*table].columns.len() as u16; + let mut columns = engine.base_schema.tables[*table] + .columns + .iter() + .enumerate() + .map(|(idx, existing)| ColumnSchema::for_test(idx as u16, &existing.name, existing.ty.clone())) + .collect::>(); + columns.push(ColumnSchema::for_test(column_idx, &column.name, column.ty.clone())); + let new_table_id = engine + .db()? + .add_columns_to_table(tx, table_id, columns, vec![default.clone()]) + .map_err(|err| format!("add column failed: {err}"))?; + Ok(new_table_id) + })?; + self.base_table_ids[*table] = table_id; + self.base_schema.tables[*table].columns.push(column.clone()); + self.refresh_observed_durable_offset(false)?; + Ok(TableObservation::Applied) + } + TableOperation::AddIndex { conn, table, cols } => { + self.with_mut_tx(*conn, |engine, tx| { + let table_id = engine.table_id_for_index(*table)?; + let mut schema = IndexSchema::for_test( + format!( + "{}_dst_added_{}_idx", + engine.base_schema.tables[*table].name, + engine.base_schema.tables[*table].extra_indexes.len() + ), + BTreeAlgorithm::from(cols.iter().copied().collect::()), + ); + schema.table_id = table_id; + engine + .db()? + .create_index(tx, schema, false) + .map_err(|err| format!("add index failed: {err}"))?; + Ok(()) + })?; + if !self.base_schema.tables[*table].extra_indexes.contains(cols) { + self.base_schema.tables[*table].extra_indexes.push(cols.clone()); + } + self.refresh_observed_durable_offset(false)?; + Ok(TableObservation::Applied) + } TableOperation::PointLookup { conn, table, id } => { let actual = self.lookup_base_row(*conn, *table, *id)?; Ok(TableObservation::PointLookup { @@ -674,6 +844,9 @@ impl RelationalDbEngine { mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result, ) -> Result { self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn].is_some() { + return Err(format!("connection {conn} cannot write while read transaction is open")); + } if self.execution.tx_by_connection[conn].is_some() { let mut tx = self.execution.tx_by_connection[conn] .take() @@ -694,15 +867,36 @@ impl RelationalDbEngine { .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); self.execution.active_writer = Some(conn); let value = f(self, &mut tx)?; - self.db()? + let committed = self + .db()? .commit_tx(tx) .map_err(|err| format!("auto-commit write failed: {err}"))?; + self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); self.execution.active_writer = None; self.stats.transactions.auto_commit += 1; - self.capture_pending_snapshot_if_idle()?; Ok(value) } + fn expect_write_conflict(&self, owner: usize, conn: usize) -> Result<(), String> { + self.execution.ensure_known_connection(owner)?; + self.execution.ensure_known_connection(conn)?; + if owner == conn { + return Err(format!("write conflict owner and contender are both connection {conn}")); + } + if self.execution.active_writer != Some(owner) { + return Err(format!( + "expected connection {owner} to own write lock, actual={:?}", + self.execution.active_writer + )); + } + if self.read_tx_by_connection[conn].is_some() { + return Err(format!( + "conflicting connection {conn} unexpectedly has a read transaction" + )); + } + Ok(()) + } + fn create_dynamic_table(&mut self, conn: usize, slot: u32) -> Result { if self.execution.active_writer.is_some() { trace!( @@ -742,7 +936,7 @@ impl RelationalDbEngine { ); Ok(()) })?; - self.sync_and_snapshot(false)?; + self.refresh_observed_durable_offset(false)?; Ok(CommitlogObservation::Applied) } @@ -769,7 +963,7 @@ impl RelationalDbEngine { } Ok(()) })?; - self.sync_and_snapshot(false)?; + self.refresh_observed_durable_offset(false)?; Ok(CommitlogObservation::Applied) } @@ -828,7 +1022,7 @@ impl RelationalDbEngine { inserted_row: inserted, })) })?; - self.sync_and_snapshot(false)?; + self.refresh_observed_durable_offset(false)?; Ok(probe .map(CommitlogObservation::DynamicMigrationProbe) .unwrap_or(CommitlogObservation::Skipped)) @@ -838,22 +1032,34 @@ impl RelationalDbEngine { self.execution.active_writer.unwrap_or(conn) } - fn sync_and_snapshot(&mut self, forced: bool) -> Result<(), String> { + fn refresh_observed_durable_offset(&mut self, forced: bool) -> Result<(), String> { let durable_offset = self.durability.durable_tx_offset().last_seen(); if forced || durable_offset != self.last_observed_durable_offset { self.last_observed_durable_offset = durable_offset; - self.pending_snapshot_capture = true; - self.capture_pending_snapshot_if_idle()?; } Ok(()) } - fn capture_pending_snapshot_if_idle(&mut self) -> Result<(), String> { - if self.pending_snapshot_capture && self.execution.active_writer.is_none() { - self.last_durable_snapshot = self.snapshot_tracked_tables()?; - self.pending_snapshot_capture = false; + async fn wait_for_requested_durability(&mut self, forced: bool) -> Result<(), String> { + if let Some(target_offset) = self.last_requested_durable_offset { + let current = self.durability.durable_tx_offset().last_seen(); + if current.is_none_or(|offset| offset < target_offset) { + self.durability + .durable_tx_offset() + .wait_for(target_offset) + .await + .map_err(|err| format!("durability wait for tx offset {target_offset} failed: {err}"))?; + } + } else if forced { + tokio::task::yield_now().await; + } + self.refresh_observed_durable_offset(forced) + } + + fn record_committed_offset(&mut self, offset: Option) { + if let Some(offset) = offset { + self.last_requested_durable_offset = Some(offset); } - Ok(()) } fn table_id_for_index(&self, table: usize) -> Result { @@ -872,6 +1078,13 @@ impl RelationalDbEngine { .map_err(|err| format!("in-tx lookup failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .next()) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn) { + Ok(self + .db()? + .iter_by_col_eq(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("read-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) } else { let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); @@ -898,6 +1111,15 @@ impl RelationalDbEngine { .collect::>(); rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn) { + let mut rows = self + .db()? + .iter(tx, table_id) + .map_err(|err| format!("read-tx scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) } else { self.collect_rows_by_id(table_id) } @@ -917,6 +1139,12 @@ impl RelationalDbEngine { .iter_by_col_eq_mut(tx, table_id, col, value) .map_err(|err| format!("in-tx predicate query failed: {err}"))? .count()) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn) { + Ok(self + .db()? + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("read-tx predicate query failed: {err}"))? + .count()) } else { self.count_by_col_eq_for_property(table, col, value) } @@ -938,6 +1166,12 @@ impl RelationalDbEngine { .map_err(|err| format!("in-tx range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>() + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn) { + self.db()? + .iter_by_col_range(tx, table_id, col_list, (lower, upper)) + .map_err(|err| format!("read-tx range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>() } else { let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); @@ -1020,46 +1254,63 @@ impl RelationalDbEngine { Ok(rows) } - fn snapshot_tracked_tables(&self) -> Result { - let mut snap = BTreeMap::new(); - for (idx, table_id) in self.base_table_ids.iter().enumerate() { - let name = self - .base_schema - .tables - .get(idx) - .map(|t| t.name.clone()) - .ok_or_else(|| format!("base table index {idx} missing schema"))?; - snap.insert(name, self.collect_rows_by_id(*table_id)?); - } - for state in self.dynamic_tables.values() { - let name = state.name.clone(); - snap.insert(name, self.collect_rows_by_id(state.table_id)?); - } - Ok(snap) + fn durable_replay_summary(&self) -> Result { + Ok(DurableReplaySummary { + durable_offset: self.last_observed_durable_offset, + base_rows: self.collect_base_rows()?, + dynamic_table_count: self.dynamic_tables.len(), + }) + } + + async fn reopen_for_final_replay_check(&mut self) -> Result { + let old_db = self + .db + .take() + .ok_or_else(|| "final replay check failed: relational db not initialized".to_string())?; + old_db.shutdown().await; + drop(old_db); + + let (durability, db) = self.reopen_from_history_with_fault_retry("final replay check")?; + self.stats.runtime.durability_actors_started += 1; + self.durability = durability; + self.db = Some(db); + self.rebuild_table_handles_after_reopen()?; + self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); + self.durable_replay_summary() } - fn collect_outcome(&mut self) -> Result { - self.capture_pending_snapshot_if_idle()?; - self.sync_and_snapshot(true)?; + async fn collect_outcome(&mut self) -> Result { + self.wait_for_requested_durability(true).await?; let table = self.collect_table_outcome()?; + let replay = self.reopen_for_final_replay_check().await?; let durable_commit_count = self .last_observed_durable_offset .map(|offset| (offset as usize).saturating_add(1)) .unwrap_or(0); + let replay_table_count = replay.base_rows.len() + replay.dynamic_table_count; debug!(durable_commits = durable_commit_count, "replayed durable prefix"); Ok(RelationalDbCommitlogOutcome { applied_steps: self.step, durable_commit_count, - replay_table_count: self.last_durable_snapshot.len(), + replay_table_count, schema: schema_summary(&self.base_schema), interactions: self.stats.interactions.clone(), table_ops: self.stats.table_ops.clone(), transactions: self.stats.transaction_summary(durable_commit_count), runtime: self.stats.runtime_summary(), + disk_faults: self.commitlog_repo.fault_summary(), + replay, table, }) } + fn collect_base_rows(&self) -> Result>, String> { + self.base_table_ids + .iter() + .map(|&table_id| self.collect_rows_by_id(table_id)) + .collect() + } + fn collect_table_outcome(&self) -> Result { let mut final_rows = Vec::with_capacity(self.base_table_ids.len()); let mut final_row_counts = Vec::with_capacity(self.base_table_ids.len()); @@ -1078,10 +1329,17 @@ impl RelationalDbEngine { fn finish(&mut self) { for tx in &mut self.execution.tx_by_connection { - if let Some(tx) = tx.take() { - if let Some(db) = &self.db { - let _ = db.rollback_mut_tx(tx); - } + if let Some(tx) = tx.take() + && let Some(db) = &self.db + { + let _ = db.rollback_mut_tx(tx); + } + } + for tx in &mut self.read_tx_by_connection { + if let Some(tx) = tx.take() + && let Some(db) = &self.db + { + let _ = db.release_tx(tx); } } self.execution.active_writer = None; @@ -1132,44 +1390,54 @@ impl TargetEngine for RelationalDbEngine { type Outcome = RelationalDbCommitlogOutcome; type Error = String; - async fn execute_interaction( - &mut self, - interaction: &CommitlogInteraction, - ) -> Result { - self.execute(interaction).await + #[allow(clippy::manual_async_fn)] + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a CommitlogInteraction, + ) -> impl std::future::Future> + 'a { + async move { self.execute(interaction).await } } fn finish(&mut self) { Self::finish(self); } - fn collect_outcome(&mut self) -> anyhow::Result { - RelationalDbEngine::collect_outcome(self).map_err(anyhow::Error::msg) + #[allow(clippy::manual_async_fn)] + fn collect_outcome<'a>(&'a mut self) -> impl std::future::Future> + 'a { + async move { + RelationalDbEngine::collect_outcome(self) + .await + .map_err(anyhow::Error::msg) + } } } type StressCommitlogRepo = BuggifiedRepo; type InMemoryCommitlogDurability = Local; +struct RelationalDbBootstrap { + db: RelationalDB, + runtime_handle: tokio::runtime::Handle, + commitlog_repo: StressCommitlogRepo, + durability: Arc, + durability_opts: spacetimedb_durability::local::Options, + runtime_guard: Option, +} + fn bootstrap_relational_db( seed: DstSeed, -) -> anyhow::Result<( - RelationalDB, - tokio::runtime::Handle, - StressCommitlogRepo, - Arc, - spacetimedb_durability::local::Options, - Option, -)> { + fault_profile: CommitlogFaultProfile, +) -> anyhow::Result { let (runtime_handle, runtime_guard) = if let Ok(handle) = tokio::runtime::Handle::try_current() { (handle, None) } else { let runtime = tokio::runtime::Runtime::new()?; (runtime.handle().clone(), Some(runtime)) }; - enable_madsim_buggify(); + let fault_config = CommitlogFaultConfig::for_profile(fault_profile); + configure_madsim_buggify(fault_config.enabled()); - let commitlog_repo = BuggifiedRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024)); + let commitlog_repo = BuggifiedRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024), fault_config); let durability_opts = commitlog_stress_options(seed.fork(701)); let durability = Arc::new( InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime_handle.clone(), durability_opts) @@ -1193,28 +1461,36 @@ fn bootstrap_relational_db( db.with_auto_commit(Workload::Internal, |tx| { db.set_initialized(tx, Program::empty(HostType::Wasm.into())) })?; - Ok(( + Ok(RelationalDbBootstrap { db, runtime_handle, commitlog_repo, durability, durability_opts, runtime_guard, - )) + }) } fn commitlog_stress_options(seed: DstSeed) -> spacetimedb_durability::local::Options { let mut opts = spacetimedb_durability::local::Options::default(); opts.commitlog.max_segment_size = 2 * 1024; opts.commitlog.offset_index_interval_bytes = NonZeroU64::new(256).expect("256 > 0"); - opts.commitlog.offset_index_require_segment_fsync = seed.0 % 2 == 0; + opts.commitlog.offset_index_require_segment_fsync = seed.0.is_multiple_of(2); opts.commitlog.write_buffer_size = 512; opts } -fn enable_madsim_buggify() { +fn configure_madsim_buggify(enabled: bool) { #[cfg(madsim)] - madsim::buggify::enable(); + { + if enabled { + madsim::buggify::enable(); + } else { + madsim::buggify::disable(); + } + } + #[cfg(not(madsim))] + let _ = enabled; } fn runtime_alive_tasks() -> Option { diff --git a/crates/dst/src/targets/standalone_host.rs b/crates/dst/src/targets/standalone_host.rs index a7e06e67bfb..f58f19d6dac 100644 --- a/crates/dst/src/targets/standalone_host.rs +++ b/crates/dst/src/targets/standalone_host.rs @@ -8,9 +8,8 @@ use std::{ use bytes::Bytes; use spacetimedb_client_api::{ - auth::SpacetimeAuth, - routes::subscribe::{generate_random_connection_id, WebSocketOptions}, - ControlStateReadAccess, ControlStateWriteAccess, NodeDelegate, + auth::SpacetimeAuth, routes::subscribe::WebSocketOptions, ControlStateReadAccess, ControlStateWriteAccess, + NodeDelegate, }; use spacetimedb_client_api_messages::websocket::v1 as ws_v1; use spacetimedb_core::{ @@ -21,7 +20,7 @@ use spacetimedb_core::{ messages::control_db::HostType, util::jobs::JobCores, }; -use spacetimedb_lib::Identity; +use spacetimedb_lib::{ConnectionId, Identity}; use spacetimedb_paths::{RootDir, SpacetimePaths}; use spacetimedb_sats::ProductValue; use spacetimedb_schema::{auto_migrate::MigrationPolicy, def::FunctionVisibility}; @@ -30,10 +29,10 @@ use tracing::trace; use crate::{ config::RunConfig, - core::NextInteractionSource, + core::{self, StreamingProperties, TargetEngine}, seed::DstSeed, workload::module_ops::{ - HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome, NextInteractionGenerator, + HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome, ModuleWorkloadSource, }, }; @@ -49,53 +48,19 @@ pub async fn run_generated_with_config_and_scenario( scenario: HostScenarioId, config: RunConfig, ) -> anyhow::Result { - let (outcome, _) = run_once_async(seed, scenario, config).await?; - Ok(outcome) + run_once_async(seed, scenario, config).await } async fn run_once_async( seed: DstSeed, scenario: HostScenarioId, config: RunConfig, -) -> anyhow::Result<(StandaloneHostOutcome, Vec)> { +) -> anyhow::Result { let module = compiled_module()?; let reducers = extract_reducer_specs(module.clone()).await?; - let mut generator = NextInteractionGenerator::new( - seed, - scenario, - reducers.clone(), - config.max_interactions_or_default(usize::MAX), - ); - let mut engine = StandaloneHostEngine::new(seed, module).await?; - let deadline = config.deadline(); - let mut trace_log = Vec::new(); - - loop { - if deadline.is_some_and(|deadline| Instant::now() >= deadline) { - generator.request_finish(); - } - let Some(interaction) = generator.next_interaction() else { - break; - }; - trace!(?interaction, "standalone_host interaction"); - engine - .execute(&interaction) - .await - .map_err(|e| anyhow::anyhow!("interaction failed: {e}"))?; - trace_log.push(interaction); - } - - // Replay contract: same seed/scenario/config must produce same interaction sequence. - let mut replay = - NextInteractionGenerator::new(seed, scenario, reducers, config.max_interactions_or_default(usize::MAX)); - let replayed = (0..trace_log.len()) - .filter_map(|_| replay.next_interaction()) - .collect::>(); - if replayed != trace_log { - anyhow::bail!("interaction sequence replay mismatch"); - } - - Ok((engine.finish(), trace_log)) + let generator = ModuleWorkloadSource::new(seed, scenario, reducers, config.max_interactions_or_default(usize::MAX)); + let engine = StandaloneHostEngine::new(seed, module).await?; + core::run_streaming(generator, engine, NoopHostProperties, config).await } #[derive(Clone)] @@ -152,6 +117,8 @@ struct StandaloneHostEngine { root_dir: RootDir, session: Option, module: Arc, + seed: DstSeed, + session_generation: u64, step: usize, reducer_calls: usize, scheduler_waits: usize, @@ -169,13 +136,15 @@ impl StandaloneHostEngine { SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos() ))); let _ = std::fs::remove_dir_all(&root_dir); - let session = open_session(&root_dir, &module, None) + let session = open_session(&root_dir, &module, None, connection_id_for_session(seed, 0)) .await .map_err(anyhow::Error::msg)?; Ok(Self { root_dir, session: Some(session), module, + seed, + session_generation: 1, step: 0, reducer_calls: 0, scheduler_waits: 0, @@ -233,7 +202,10 @@ impl StandaloneHostEngine { .db_identity; let old = self.session.take(); drop(old); - self.session = Some(open_session(&self.root_dir, &self.module, Some(db_identity)).await?); + let connection_id = connection_id_for_session(self.seed, self.session_generation); + self.session_generation = self.session_generation.saturating_add(1); + self.session = + Some(open_session(&self.root_dir, &self.module, Some(db_identity), connection_id).await?); Ok(()) } ModuleInteraction::NoOp => { @@ -243,7 +215,7 @@ impl StandaloneHostEngine { } } - fn finish(self) -> StandaloneHostOutcome { + fn outcome(&self) -> StandaloneHostOutcome { StandaloneHostOutcome { steps_executed: self.step, reducer_calls: self.reducer_calls, @@ -255,14 +227,63 @@ impl StandaloneHostEngine { } } +impl TargetEngine for StandaloneHostEngine { + type Observation = (); + type Outcome = StandaloneHostOutcome; + type Error = String; + + #[allow(clippy::manual_async_fn)] + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a ModuleInteraction, + ) -> impl std::future::Future> + 'a { + async move { + trace!(?interaction, "standalone_host interaction"); + self.execute(interaction).await + } + } + + fn finish(&mut self) {} + + #[allow(clippy::manual_async_fn)] + fn collect_outcome<'a>(&'a mut self) -> impl std::future::Future> + 'a { + async move { Ok(self.outcome()) } + } +} + +struct NoopHostProperties; + +impl StreamingProperties for NoopHostProperties { + fn observe( + &mut self, + _engine: &StandaloneHostEngine, + _interaction: &ModuleInteraction, + _observation: &(), + ) -> Result<(), String> { + Ok(()) + } + + fn finish(&mut self, _engine: &StandaloneHostEngine, _outcome: &StandaloneHostOutcome) -> Result<(), String> { + Ok(()) + } +} + fn is_expected_error(_reducer: &str, msg: &str) -> bool { msg.contains("permission denied") } +fn connection_id_for_session(seed: DstSeed, generation: u64) -> ConnectionId { + let high = seed.fork(1_000 + generation.saturating_mul(2)).0 as u128; + let low = seed.fork(1_001 + generation.saturating_mul(2)).0 as u128; + let id = (high << 64) | low; + ConnectionId::from_u128(id.max(1)) +} + async fn open_session( root_dir: &RootDir, module: &CompiledModuleInfo, maybe_db_identity: Option, + connection_id: ConnectionId, ) -> Result { let paths = SpacetimePaths::from_root_dir(root_dir); let certs = CertificateAuthority::in_cli_config_dir(&paths.cli_config_dir); @@ -335,7 +356,7 @@ async fn open_session( .map_err(|e| format!("module watcher failed: {e:#}"))?; let client_id = ClientActorId { identity: caller_identity, - connection_id: generate_random_connection_id(), + connection_id, name: env.client_actor_index().next_client_name(), }; let client = ClientConnection::dummy(client_id, ClientConfig::for_test(), replica.id, module_rx); diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index 098cc980d50..c01d6ff8880 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -1,4 +1,4 @@ -//! Composite generator: reuse `table_ops` and interleave lifecycle + chaos ops. +//! Commitlog workload source: table workload plus lifecycle and durability pressure. use std::collections::{BTreeSet, VecDeque}; @@ -9,13 +9,41 @@ use crate::{ workload::strategy::{Index, Percent, Strategy}, workload::{ commitlog_ops::CommitlogInteraction, - table_ops::{strategies::ConnectionChoice, NextInteractionGenerator, TableScenario}, + table_ops::{strategies::ConnectionChoice, TableScenario, TableWorkloadSource}, }, }; -/// Streaming composite interaction source for commitlog-oriented targets. -pub(crate) struct NextInteractionGeneratorComposite { - base: NextInteractionGenerator, +/// Generation profile for commitlog-specific interactions layered around table ops. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) struct CommitlogWorkloadProfile { + pub(crate) chaos_sync_pct: usize, + pub(crate) close_reopen_pct: usize, + pub(crate) create_dynamic_table_pct: usize, + pub(crate) migrate_after_create_pct: usize, + pub(crate) migrate_dynamic_table_pct: usize, + pub(crate) drop_dynamic_table_pct: usize, +} + +impl Default for CommitlogWorkloadProfile { + fn default() -> Self { + Self { + chaos_sync_pct: 18, + close_reopen_pct: 1, + create_dynamic_table_pct: 1, + migrate_after_create_pct: 55, + migrate_dynamic_table_pct: 6, + drop_dynamic_table_pct: 5, + } + } +} + +/// Streaming source for commitlog-oriented targets. +/// +/// This composes a base table workload with commitlog lifecycle interactions +/// instead of defining an unrelated workload language. +pub(crate) struct CommitlogWorkloadSource { + base: TableWorkloadSource, + profile: CommitlogWorkloadProfile, rng: DstRng, num_connections: usize, next_slot: u32, @@ -23,16 +51,35 @@ pub(crate) struct NextInteractionGeneratorComposite { pending: VecDeque, } -impl NextInteractionGeneratorComposite { +impl CommitlogWorkloadSource { pub fn new( seed: DstSeed, scenario: S, schema: SchemaPlan, num_connections: usize, target_interactions: usize, + ) -> Self { + Self::with_profile( + seed, + scenario, + schema, + num_connections, + target_interactions, + CommitlogWorkloadProfile::default(), + ) + } + + pub fn with_profile( + seed: DstSeed, + scenario: S, + schema: SchemaPlan, + num_connections: usize, + target_interactions: usize, + profile: CommitlogWorkloadProfile, ) -> Self { Self { - base: NextInteractionGenerator::new(seed.fork(123), scenario, schema, num_connections, target_interactions), + base: TableWorkloadSource::new(seed.fork(123), scenario, schema, num_connections, target_interactions), + profile, rng: seed.fork(124).rng(), num_connections, next_slot: 0, @@ -51,14 +98,18 @@ impl NextInteractionGeneratorComposite { }; self.pending.push_back(CommitlogInteraction::Table(base_op)); - if Percent::new(18).sample(&mut self.rng) { + if self.base.has_open_read_tx() { + return true; + } + + if Percent::new(self.profile.chaos_sync_pct).sample(&mut self.rng) { self.pending.push_back(CommitlogInteraction::ChaosSync); } - if Percent::new(1).sample(&mut self.rng) { + if Percent::new(self.profile.close_reopen_pct).sample(&mut self.rng) { self.pending.push_back(CommitlogInteraction::CloseReopen); } - if Percent::new(1).sample(&mut self.rng) { + if Percent::new(self.profile.create_dynamic_table_pct).sample(&mut self.rng) { let conn = ConnectionChoice { connection_count: self.num_connections, } @@ -70,14 +121,14 @@ impl NextInteractionGeneratorComposite { .push_back(CommitlogInteraction::CreateDynamicTable { conn, slot }); // Frequently follow a create with migration to stress add-column + // copy + subsequent auto-inc allocation paths. - if Percent::new(55).sample(&mut self.rng) { + if Percent::new(self.profile.migrate_after_create_pct).sample(&mut self.rng) { self.pending .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); } return true; } - if !self.alive_slots.is_empty() && Percent::new(6).sample(&mut self.rng) { + if !self.alive_slots.is_empty() && Percent::new(self.profile.migrate_dynamic_table_pct).sample(&mut self.rng) { let conn = ConnectionChoice { connection_count: self.num_connections, } @@ -92,7 +143,7 @@ impl NextInteractionGeneratorComposite { .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); } - if !self.alive_slots.is_empty() && Percent::new(5).sample(&mut self.rng) { + if !self.alive_slots.is_empty() && Percent::new(self.profile.drop_dynamic_table_pct).sample(&mut self.rng) { let conn = ConnectionChoice { connection_count: self.num_connections, } @@ -112,7 +163,7 @@ impl NextInteractionGeneratorComposite { } } -impl NextInteractionGeneratorComposite { +impl CommitlogWorkloadSource { pub fn pull_next_interaction(&mut self) -> Option { loop { if let Some(next) = self.pending.pop_front() { @@ -125,7 +176,7 @@ impl NextInteractionGeneratorComposite { } } -impl NextInteractionSource for NextInteractionGeneratorComposite { +impl NextInteractionSource for CommitlogWorkloadSource { type Interaction = CommitlogInteraction; fn next_interaction(&mut self) -> Option { @@ -137,7 +188,7 @@ impl NextInteractionSource for NextInteractionGeneratorComposi } } -impl Iterator for NextInteractionGeneratorComposite { +impl Iterator for CommitlogWorkloadSource { type Item = CommitlogInteraction; fn next(&mut self) -> Option { diff --git a/crates/dst/src/workload/commitlog_ops/mod.rs b/crates/dst/src/workload/commitlog_ops/mod.rs index 2ce68e4ae81..e08647e7a6f 100644 --- a/crates/dst/src/workload/commitlog_ops/mod.rs +++ b/crates/dst/src/workload/commitlog_ops/mod.rs @@ -3,8 +3,8 @@ mod generation; mod types; -pub(crate) use generation::NextInteractionGeneratorComposite; +pub(crate) use generation::CommitlogWorkloadSource; pub use types::{ - CommitlogInteraction, CommitlogWorkloadOutcome, InteractionSummary, RuntimeSummary, SchemaSummary, - TableOperationSummary, TransactionSummary, + CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary, InteractionSummary, + RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary, }; diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index 2b067273720..6cb00c11c44 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -1,6 +1,10 @@ //! Serializable interaction model for relational-db + commitlog DST. -use crate::workload::table_ops::{TableWorkloadInteraction, TableWorkloadOutcome}; +use crate::{ + config::CommitlogFaultProfile, + schema::SimRow, + workload::table_ops::{TableWorkloadInteraction, TableWorkloadOutcome}, +}; /// One interaction in the commitlog-oriented mixed workload. #[derive(Clone, Debug, Eq, PartialEq)] @@ -30,9 +34,19 @@ pub struct CommitlogWorkloadOutcome { pub table_ops: TableOperationSummary, pub transactions: TransactionSummary, pub runtime: RuntimeSummary, + pub disk_faults: DiskFaultSummary, + pub replay: DurableReplaySummary, pub table: TableWorkloadOutcome, } +/// State observed after opening a fresh database from durable commitlog history. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DurableReplaySummary { + pub durable_offset: Option, + pub base_rows: Vec>, + pub dynamic_table_count: usize, +} + #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct SchemaSummary { pub initial_tables: usize, @@ -57,19 +71,47 @@ pub struct InteractionSummary { #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct TableOperationSummary { + /// Explicit write transaction starts. pub begin_tx: usize, + /// Explicit write transaction commits. pub commit_tx: usize, + /// Explicit write transaction rollbacks. pub rollback_tx: usize, + /// Long read snapshot starts. + pub begin_read_tx: usize, + /// Long read snapshot releases. + pub release_read_tx: usize, + /// Expected failures when a second writer tries to begin. + pub begin_tx_conflict: usize, + /// Expected failures when a second writer tries to write. + pub write_conflict_insert: usize, + /// Fresh single-row inserts. pub insert: usize, + /// Single-row deletes. pub delete: usize, - pub duplicate_insert: usize, + /// Exact full-row reinserts that should be idempotent no-ops. + pub exact_duplicate_insert: usize, + /// Same primary id with different payload; should violate the unique key. + pub unique_key_conflict_insert: usize, + /// Deletes of absent rows that should report no mutation. pub delete_missing: usize, + /// Multi-row inserts. pub batch_insert: usize, + /// Multi-row deletes. pub batch_delete: usize, + /// Delete followed by inserting the same row. pub reinsert: usize, + /// Add-column schema changes against live base tables. + pub add_column: usize, + /// Add-index schema changes against live base tables. + pub add_index: usize, + /// Primary-id lookup oracle checks. pub point_lookup: usize, + /// Column equality count oracle checks. pub predicate_count: usize, + /// Indexed range scan oracle checks. pub range_scan: usize, + /// Full scan oracle checks. pub full_scan: usize, } @@ -89,3 +131,17 @@ pub struct RuntimeSummary { pub durability_actors_started: usize, pub runtime_alive_tasks: Option, } + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DiskFaultSummary { + pub profile: CommitlogFaultProfile, + pub latency: usize, + pub short_read: usize, + pub short_write: usize, + pub read_error: usize, + pub write_error: usize, + pub flush_error: usize, + pub fsync_error: usize, + pub open_error: usize, + pub metadata_error: usize, +} diff --git a/crates/dst/src/workload/module_ops/generation.rs b/crates/dst/src/workload/module_ops/generation.rs index e3049045ac5..3467ab60503 100644 --- a/crates/dst/src/workload/module_ops/generation.rs +++ b/crates/dst/src/workload/module_ops/generation.rs @@ -16,8 +16,8 @@ enum ActionKind { Reopen, } -/// Deterministic stream generator for standalone-host interactions. -pub(crate) struct NextInteractionGenerator { +/// Deterministic source for standalone-host interactions. +pub(crate) struct ModuleWorkloadSource { scenario: HostScenarioId, reducers: Vec, rng: DstRng, @@ -25,7 +25,7 @@ pub(crate) struct NextInteractionGenerator { emitted: usize, } -impl NextInteractionGenerator { +impl ModuleWorkloadSource { pub fn new( seed: DstSeed, scenario: HostScenarioId, @@ -109,7 +109,7 @@ fn supports_generation(ty: &spacetimedb_sats::AlgebraicType) -> bool { ) } -impl NextInteractionSource for NextInteractionGenerator { +impl NextInteractionSource for ModuleWorkloadSource { type Interaction = ModuleInteraction; fn next_interaction(&mut self) -> Option { diff --git a/crates/dst/src/workload/module_ops/mod.rs b/crates/dst/src/workload/module_ops/mod.rs index 91d943d562e..a2e20cd4d12 100644 --- a/crates/dst/src/workload/module_ops/mod.rs +++ b/crates/dst/src/workload/module_ops/mod.rs @@ -3,5 +3,5 @@ mod generation; mod types; -pub(crate) use generation::NextInteractionGenerator; +pub(crate) use generation::ModuleWorkloadSource; pub use types::{HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome}; diff --git a/crates/dst/src/workload/strategy.rs b/crates/dst/src/workload/strategy.rs index 191f98dce75..f3acbd2d19d 100644 --- a/crates/dst/src/workload/strategy.rs +++ b/crates/dst/src/workload/strategy.rs @@ -69,9 +69,8 @@ pub(crate) struct Percent { impl Percent { pub(crate) fn new(percent: usize) -> Self { - Self { - percent: percent.min(100), - } + assert!(percent <= 100, "percent must be in 0..=100, got {percent}"); + Self { percent } } } @@ -144,4 +143,10 @@ mod tests { assert!(idx < 5); } } + + #[test] + #[should_panic(expected = "percent must be in 0..=100")] + fn percent_rejects_out_of_range_values() { + let _ = Percent::new(101); + } } diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index d3ebb5badd9..085279b0871 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -2,7 +2,7 @@ use std::collections::VecDeque; use crate::{ core::NextInteractionSource, - schema::{SchemaPlan, TablePlan}, + schema::{ColumnPlan, SchemaPlan, TablePlan}, seed::{DstRng, DstSeed}, workload::strategy::{Index, Percent, Strategy}, }; @@ -19,7 +19,7 @@ use super::{ /// duration runs do not need to materialize the full interaction list in /// memory up front. #[derive(Clone, Debug)] -pub struct NextInteractionGenerator { +pub struct TableWorkloadSource { // Deterministic source for all planner choices. rng: DstRng, // Scenario-specific workload policy layered on top of the shared model. @@ -65,6 +65,42 @@ impl<'a> ScenarioPlanner<'a> { Percent::new(percent).sample(self.rng) } + pub fn connection_count(&self) -> usize { + self.model.connections.len() + } + + pub fn active_writer(&self) -> Option { + self.model.active_writer() + } + + pub fn has_read_tx(&self, conn: usize) -> bool { + self.model.has_read_tx(conn) + } + + pub fn any_read_tx(&self) -> bool { + self.model.any_read_tx() + } + + pub fn begin_read_tx(&mut self, conn: usize) { + self.model.begin_read_tx(conn); + } + + pub fn release_read_tx(&mut self, conn: usize) { + self.model.release_read_tx(conn); + } + + pub fn begin_tx(&mut self, conn: usize) { + self.model.begin_tx(conn); + } + + pub fn commit_tx(&mut self, conn: usize) { + self.model.commit(conn); + } + + pub fn rollback_tx(&mut self, conn: usize) { + self.model.rollback(conn); + } + /// Tries to emit one transaction control interaction for `conn`. /// /// The shared generator owns transaction lifecycle so scenario code can @@ -77,7 +113,11 @@ impl<'a> ScenarioPlanner<'a> { }) .sample(self.rng) { - TxControlAction::Begin if !self.model.connections[conn].in_tx && self.model.active_writer().is_none() => { + TxControlAction::Begin + if !self.model.connections[conn].in_tx + && !self.model.has_read_tx(conn) + && self.model.active_writer().is_none() => + { self.model.begin_tx(conn); self.pending.push_back(TableWorkloadInteraction::begin_tx(conn)); true @@ -129,16 +169,32 @@ impl<'a> ScenarioPlanner<'a> { self.model.insert(conn, table, row); } + pub fn add_column(&mut self, table: usize, column: ColumnPlan, default: spacetimedb_sats::AlgebraicValue) { + self.model.add_column(table, column, default); + } + + pub fn add_index(&mut self, table: usize, cols: Vec) { + self.model.add_index(table, cols); + } + pub fn absent_row(&mut self, conn: usize, table: usize) -> crate::schema::SimRow { self.model.absent_row(self.rng, conn, table) } + pub fn unique_key_conflict_row( + &mut self, + table: usize, + source: &crate::schema::SimRow, + ) -> Option { + self.model.unique_key_conflict_row(self.rng, table, source) + } + pub fn push_interaction(&mut self, interaction: TableWorkloadInteraction) { self.pending.push_back(interaction); } } -impl NextInteractionGenerator { +impl TableWorkloadSource { pub fn new( seed: DstSeed, scenario: S, @@ -163,6 +219,10 @@ impl NextInteractionGenerator { self.target_interactions = self.emitted; } + pub fn has_open_read_tx(&self) -> bool { + self.model.any_read_tx() + } + fn fill_pending(&mut self) { if self.emitted >= self.target_interactions { // Once the workload budget is spent, stop asking the scenario for @@ -175,6 +235,11 @@ impl NextInteractionGenerator { self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); return; } + if self.model.has_read_tx(conn) { + self.model.release_read_tx(conn); + self.pending.push_back(TableWorkloadInteraction::release_read_tx(conn)); + return; + } } self.finished = true; return; @@ -183,12 +248,20 @@ impl NextInteractionGenerator { // Locking targets allow only one writer at a time. If a writer is // already open, keep driving that same connection until it commits or // rolls back. Otherwise pick a fresh connection uniformly. - let conn = self.model.active_writer().unwrap_or_else(|| { + let conn = if let Some(active_writer) = self.model.active_writer() { + active_writer + } else if let Some(read_conn) = (0..self.num_connections).find(|&conn| self.model.has_read_tx(conn)) { + // The current RelationalDB target can block when a write transaction + // starts behind an open read transaction. Keep driving the snapshot + // holder until it releases; interleaved read/write snapshots should + // be reintroduced once the target models that lock behavior. + read_conn + } else { ConnectionChoice { connection_count: self.num_connections, } .sample(&mut self.rng) - }); + }; let mut planner = ScenarioPlanner { rng: &mut self.rng, model: &mut self.model, @@ -198,7 +271,7 @@ impl NextInteractionGenerator { } } -impl NextInteractionGenerator { +impl TableWorkloadSource { pub fn pull_next_interaction(&mut self) -> Option { loop { // Scenario planning fills `pending` in bursts, but the iterator @@ -217,7 +290,7 @@ impl NextInteractionGenerator { } } -impl NextInteractionSource for NextInteractionGenerator { +impl NextInteractionSource for TableWorkloadSource { type Interaction = TableWorkloadInteraction; fn next_interaction(&mut self) -> Option { @@ -229,7 +302,7 @@ impl NextInteractionSource for NextInteractionGenerator { } } -impl Iterator for NextInteractionGenerator { +impl Iterator for TableWorkloadSource { type Item = TableWorkloadInteraction; fn next(&mut self) -> Option { diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index 21ff91c0743..18dbd2e3a14 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -6,7 +6,7 @@ mod scenarios; pub(crate) mod strategies; mod types; -pub(crate) use generation::NextInteractionGenerator; +pub(crate) use generation::TableWorkloadSource; pub(crate) use model::ExpectedModel; pub use scenarios::TableScenarioId; pub(crate) use types::{ConnectionWriteState, TableScenario}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index 885a8673f2b..4abfefcf24e 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -3,7 +3,7 @@ use std::ops::Bound; use spacetimedb_sats::AlgebraicValue; use crate::{ - schema::{generate_value_for_type, SchemaPlan, SimRow}, + schema::{distinct_value_for_type, generate_value_for_type, ColumnPlan, SchemaPlan, SimRow}, seed::{DstRng, DstSeed}, }; @@ -26,6 +26,7 @@ pub(crate) struct GenerationModel { #[derive(Clone, Debug, Default)] pub(crate) struct PendingConnection { pub(crate) in_tx: bool, + read_snapshot: Option>>, staged_inserts: Vec<(usize, SimRow)>, staged_deletes: Vec<(usize, SimRow)>, } @@ -55,6 +56,9 @@ impl GenerationModel { } pub(crate) fn visible_rows(&self, conn: usize, table: usize) -> Vec { + if let Some(snapshot) = &self.connections[conn].read_snapshot { + return snapshot[table].clone(); + } let mut rows = self.committed[table].clone(); let pending = &self.connections[conn]; for (pending_table, row) in &pending.staged_deletes { @@ -78,14 +82,58 @@ impl GenerationModel { row } + pub(crate) fn unique_key_conflict_row(&self, rng: &mut DstRng, table: usize, source: &SimRow) -> Option { + let table_plan = &self.schema.tables[table]; + let value_count = source.values.len().min(table_plan.columns.len()); + if value_count <= 1 { + return None; + } + + let col_idx = 1 + rng.index(value_count - 1); + let mut row = source.clone(); + row.values[col_idx] = distinct_value_for_type(&table_plan.columns[col_idx].ty, &row.values[col_idx]); + Some(row) + } + pub(crate) fn active_writer(&self) -> Option { self.active_writer } + pub(crate) fn has_read_tx(&self, conn: usize) -> bool { + self.connections[conn].read_snapshot.is_some() + } + + pub(crate) fn any_read_tx(&self) -> bool { + self.connections + .iter() + .any(|connection| connection.read_snapshot.is_some()) + } + + pub(crate) fn begin_read_tx(&mut self, conn: usize) { + let pending = &mut self.connections[conn]; + assert!(!pending.in_tx, "connection already has write transaction"); + assert!( + pending.read_snapshot.is_none(), + "connection already has read transaction" + ); + pending.read_snapshot = Some(self.committed.clone()); + } + + pub(crate) fn release_read_tx(&mut self, conn: usize) { + assert!( + self.connections[conn].read_snapshot.take().is_some(), + "connection has no read transaction" + ); + } + pub(crate) fn begin_tx(&mut self, conn: usize) { assert!(self.active_writer.is_none(), "single writer already active"); let pending = &mut self.connections[conn]; assert!(!pending.in_tx, "connection already in transaction"); + assert!( + pending.read_snapshot.is_none(), + "connection already has read transaction" + ); pending.in_tx = true; self.active_writer = Some(conn); } @@ -145,6 +193,36 @@ impl GenerationModel { pending.in_tx = false; self.active_writer = None; } + + pub(crate) fn add_column(&mut self, table: usize, column: ColumnPlan, default: AlgebraicValue) { + self.schema.tables[table].columns.push(column); + for row in &mut self.committed[table] { + row.values.push(default.clone()); + } + for connection in &mut self.connections { + for (pending_table, row) in connection + .staged_inserts + .iter_mut() + .chain(connection.staged_deletes.iter_mut()) + { + if *pending_table == table { + row.values.push(default.clone()); + } + } + if let Some(snapshot) = &mut connection.read_snapshot { + for row in &mut snapshot[table] { + row.values.push(default.clone()); + } + } + } + } + + pub(crate) fn add_index(&mut self, table: usize, cols: Vec) { + let indexes = &mut self.schema.tables[table].extra_indexes; + if !indexes.contains(&cols) { + indexes.push(cols); + } + } } /// Replay model for the expected final committed state of a table workload. @@ -162,6 +240,7 @@ pub struct ExpectedModel { #[derive(Clone, Debug, Default)] struct ExpectedConnection { in_tx: bool, + read_snapshot: Option>>, staged_inserts: Vec<(usize, SimRow)>, staged_deletes: Vec<(usize, SimRow)>, } @@ -188,6 +267,18 @@ impl ExpectedModel { self.connections[*conn].in_tx = true; self.active_writer = Some(*conn); } + TableOperation::BeginReadTx { conn } => { + let state = &mut self.connections[*conn]; + assert!(!state.in_tx, "read tx started while write tx is open"); + assert!(state.read_snapshot.is_none(), "nested read tx in expected model"); + state.read_snapshot = Some(self.committed.clone()); + } + TableOperation::ReleaseReadTx { conn } => { + assert!( + self.connections[*conn].read_snapshot.take().is_some(), + "release read tx without open read tx" + ); + } TableOperation::CommitTx { conn } => { assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in expected model"); let state = &mut self.connections[*conn]; @@ -232,8 +323,20 @@ impl ExpectedModel { self.delete(*conn, *table, row.clone()); self.insert(*conn, *table, row.clone()); } - TableOperation::DuplicateInsert { .. } + TableOperation::AddColumn { + table, + column: _, + default, + .. + } => { + self.add_column(*table, default.clone()); + } + TableOperation::AddIndex { .. } => {} + TableOperation::ExactDuplicateInsert { .. } + | TableOperation::UniqueKeyConflictInsert { .. } | TableOperation::DeleteMissing { .. } + | TableOperation::BeginTxConflict { .. } + | TableOperation::WriteConflictInsert { .. } | TableOperation::PointLookup { .. } | TableOperation::PredicateCount { .. } | TableOperation::RangeScan { .. } @@ -242,6 +345,9 @@ impl ExpectedModel { } pub fn visible_rows(&self, conn: usize, table: usize) -> Vec { + if let Some(snapshot) = &self.connections[conn].read_snapshot { + return snapshot[table].clone(); + } let mut rows = self.committed[table].clone(); let pending = &self.connections[conn]; for (pending_table, row) in &pending.staged_deletes { @@ -322,6 +428,28 @@ impl ExpectedModel { self.committed[table].retain(|candidate| *candidate != row); } } + + fn add_column(&mut self, table: usize, default: AlgebraicValue) { + for row in &mut self.committed[table] { + row.values.push(default.clone()); + } + for connection in &mut self.connections { + for (pending_table, row) in connection + .staged_inserts + .iter_mut() + .chain(connection.staged_deletes.iter_mut()) + { + if *pending_table == table { + row.values.push(default.clone()); + } + } + if let Some(snapshot) = &mut connection.read_snapshot { + for row in &mut snapshot[table] { + row.values.push(default.clone()); + } + } + } + } } fn bound_contains_lower(bound: &Bound, key: &AlgebraicValue) -> bool { diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index afbd20e2e11..c801a49f00b 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -3,14 +3,15 @@ use std::ops::Bound; use spacetimedb_sats::AlgebraicType; use crate::{ - schema::{generate_supported_type, ColumnPlan, SchemaPlan, TablePlan}, + schema::{default_value_for_type, generate_supported_type, ColumnPlan, SchemaPlan, SimRow, TablePlan}, seed::DstRng, + workload::strategy::{Index, Percent, Strategy}, }; use super::super::{generation::ScenarioPlanner, TableWorkloadInteraction, TableWorkloadOutcome}; #[derive(Clone, Copy)] -struct ScenarioTuning { +struct TableWorkloadProfile { min_tables: usize, table_count_choices: usize, min_extra_cols: usize, @@ -25,9 +26,17 @@ struct ScenarioTuning { begin_tx_pct: usize, commit_tx_pct: usize, rollback_tx_pct: usize, + begin_read_tx_pct: usize, + release_read_tx_pct: usize, + empty_tx_pct: usize, + write_conflict_pct: usize, + exact_duplicate_insert_pct: usize, + unique_key_conflict_insert_pct: usize, + add_column_pct: usize, + add_index_pct: usize, } -const RANDOM_CRUD_TUNING: ScenarioTuning = ScenarioTuning { +const RANDOM_CRUD_PROFILE: TableWorkloadProfile = TableWorkloadProfile { min_tables: 2, table_count_choices: 3, min_extra_cols: 1, @@ -42,9 +51,17 @@ const RANDOM_CRUD_TUNING: ScenarioTuning = ScenarioTuning { begin_tx_pct: 20, commit_tx_pct: 15, rollback_tx_pct: 10, + begin_read_tx_pct: 4, + release_read_tx_pct: 35, + empty_tx_pct: 2, + write_conflict_pct: 8, + exact_duplicate_insert_pct: 4, + unique_key_conflict_insert_pct: 4, + add_column_pct: 1, + add_index_pct: 2, }; -const INDEXED_RANGES_TUNING: ScenarioTuning = ScenarioTuning { +const INDEXED_RANGES_PROFILE: TableWorkloadProfile = TableWorkloadProfile { min_tables: 2, table_count_choices: 2, min_extra_cols: 3, @@ -59,29 +76,39 @@ const INDEXED_RANGES_TUNING: ScenarioTuning = ScenarioTuning { begin_tx_pct: 20, commit_tx_pct: 15, rollback_tx_pct: 8, + begin_read_tx_pct: 6, + release_read_tx_pct: 30, + empty_tx_pct: 2, + write_conflict_pct: 10, + exact_duplicate_insert_pct: 3, + unique_key_conflict_insert_pct: 4, + add_column_pct: 2, + add_index_pct: 4, }; pub fn generate_schema(rng: &mut DstRng) -> SchemaPlan { - generate_schema_with_tuning(rng, RANDOM_CRUD_TUNING) + generate_schema_with_profile(rng, RANDOM_CRUD_PROFILE) } pub fn generate_indexed_ranges_schema(rng: &mut DstRng) -> SchemaPlan { - generate_schema_with_tuning(rng, INDEXED_RANGES_TUNING) + generate_schema_with_profile(rng, INDEXED_RANGES_PROFILE) } -fn generate_schema_with_tuning(rng: &mut DstRng, tuning: ScenarioTuning) -> SchemaPlan { - let table_count = tuning.min_tables + mixed_index(rng, tuning.table_count_choices); +fn generate_schema_with_profile(rng: &mut DstRng, profile: TableWorkloadProfile) -> SchemaPlan { + let table_count = profile.min_tables + Index::new(profile.table_count_choices).sample(rng); let mut tables = Vec::with_capacity(table_count); for table_idx in 0..table_count { - let extra_cols = tuning.min_extra_cols + rng.index(tuning.extra_col_choices); + let extra_cols = profile.min_extra_cols + Index::new(profile.extra_col_choices).sample(rng); let mut columns = vec![ColumnPlan { name: "id".into(), ty: AlgebraicType::U64, }]; for col_idx in 0..extra_cols { - let ty = if col_idx < tuning.preferred_range_cols && rng.index(100) < tuning.prefer_range_compatible_pct { - if rng.index(100) < tuning.prefer_u64_pct { + let ty = if col_idx < profile.preferred_range_cols + && Percent::new(profile.prefer_range_compatible_pct).sample(rng) + { + if Percent::new(profile.prefer_u64_pct).sample(rng) { AlgebraicType::U64 } else { AlgebraicType::Bool @@ -103,14 +130,14 @@ fn generate_schema_with_tuning(rng: &mut DstRng, tuning: ScenarioTuning) -> Sche .map(|(idx, _)| idx as u16) .collect::>(); if let Some(&col) = non_primary_range_cols.first() - && rng.index(100) < tuning.single_index_pct + && Percent::new(profile.single_index_pct).sample(rng) { extra_indexes.push(vec![col]); } - if non_primary_range_cols.len() >= 2 && rng.index(100) < tuning.composite2_index_pct { + if non_primary_range_cols.len() >= 2 && Percent::new(profile.composite2_index_pct).sample(rng) { extra_indexes.push(non_primary_range_cols[..2].to_vec()); } - if non_primary_range_cols.len() >= 3 && rng.index(100) < tuning.composite3_index_pct { + if non_primary_range_cols.len() >= 3 && Percent::new(profile.composite3_index_pct).sample(rng) { extra_indexes.push(non_primary_range_cols[..3].to_vec()); } extra_indexes.sort(); @@ -125,31 +152,92 @@ fn generate_schema_with_tuning(rng: &mut DstRng, tuning: ScenarioTuning) -> Sche SchemaPlan { tables } } -fn mixed_index(rng: &mut DstRng, len: usize) -> usize { - assert!(len > 0, "len must be non-zero"); - let value = rng.next_u64(); - ((value ^ (value >> 32)) as usize) % len -} - pub fn validate_outcome(_schema: &SchemaPlan, _outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { Ok(()) } pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { - fill_pending_with_tuning(planner, conn, RANDOM_CRUD_TUNING); + fill_pending_with_profile(planner, conn, RANDOM_CRUD_PROFILE); } pub fn fill_pending_indexed_ranges(planner: &mut ScenarioPlanner<'_>, conn: usize) { - fill_pending_with_tuning(planner, conn, INDEXED_RANGES_TUNING); + fill_pending_with_profile(planner, conn, INDEXED_RANGES_PROFILE); } -fn fill_pending_with_tuning(planner: &mut ScenarioPlanner<'_>, conn: usize, tuning: ScenarioTuning) { - if planner.maybe_control_tx(conn, tuning.begin_tx_pct, tuning.commit_tx_pct, tuning.rollback_tx_pct) { +fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: usize, profile: TableWorkloadProfile) { + if planner.has_read_tx(conn) { + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + if planner.roll_percent(profile.release_read_tx_pct) { + planner.release_read_tx(conn); + planner.push_interaction(TableWorkloadInteraction::release_read_tx(conn)); + } else if !emit_query(planner, conn, table, &visible_rows) { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + } + return; + } + + if let Some(owner) = planner.active_writer() + && planner.roll_percent(profile.write_conflict_pct) + && emit_write_conflict(planner, owner) + { + return; + } + + if planner.active_writer().is_none() { + if planner.roll_percent(profile.empty_tx_pct) { + let rollback = planner.roll_percent(50); + planner.begin_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); + if rollback { + planner.rollback_tx(conn); + planner.push_interaction(TableWorkloadInteraction::rollback_tx(conn)); + } else { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } + return; + } + + if planner.roll_percent(profile.begin_read_tx_pct) { + planner.begin_read_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_read_tx(conn)); + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + if !emit_query(planner, conn, table, &visible_rows) { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + } + return; + } + } + + if planner.maybe_control_tx( + conn, + profile.begin_tx_pct, + profile.commit_tx_pct, + profile.rollback_tx_pct, + ) { return; } let table = planner.choose_table(); let visible_rows = planner.visible_rows(conn, table); + if planner.active_writer().is_none() + && !planner.any_read_tx() + && !visible_rows.is_empty() + && planner.roll_percent(profile.add_column_pct) + && emit_add_column(planner, conn, table) + { + return; + } + if planner.active_writer().is_none() + && !planner.any_read_tx() + && visible_rows.len() >= 2 + && planner.roll_percent(profile.add_index_pct) + && emit_add_index(planner, conn, table, &visible_rows) + { + return; + } if emit_query(planner, conn, table, &visible_rows) { return; } @@ -158,7 +246,7 @@ fn fill_pending_with_tuning(planner: &mut ScenarioPlanner<'_>, conn: usize, tuni planner.push_interaction(TableWorkloadInteraction::delete_missing(conn, table, row)); return; } - let choose_insert = visible_rows.is_empty() || planner.roll_percent(tuning.insert_pct); + let choose_insert = visible_rows.is_empty() || planner.roll_percent(profile.insert_pct); if choose_insert { if planner.roll_percent(10) { let count = 2 + planner.choose_index(3); @@ -173,6 +261,17 @@ fn fill_pending_with_tuning(planner: &mut ScenarioPlanner<'_>, conn: usize, tuni return; } + if planner.roll_percent(profile.exact_duplicate_insert_pct) { + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.push_interaction(TableWorkloadInteraction::exact_duplicate_insert(conn, table, row)); + return; + } + if planner.roll_percent(profile.unique_key_conflict_insert_pct) + && emit_unique_key_conflict_insert(planner, conn, table, &visible_rows) + { + return; + } + if visible_rows.len() >= 2 && planner.roll_percent(10) { let count = 2 + planner.choose_index(visible_rows.len().min(3) - 1); let mut candidates = visible_rows.clone(); @@ -197,6 +296,128 @@ fn fill_pending_with_tuning(planner: &mut ScenarioPlanner<'_>, conn: usize, tuni planner.push_interaction(TableWorkloadInteraction::delete(conn, table, row)); } +fn emit_write_conflict(planner: &mut ScenarioPlanner<'_>, owner: usize) -> bool { + let candidates = (0..planner.connection_count()) + .filter(|&conn| conn != owner && !planner.has_read_tx(conn)) + .collect::>(); + if candidates.is_empty() { + return false; + } + let conn = candidates[planner.choose_index(candidates.len())]; + if planner.roll_percent(50) { + planner.push_interaction(TableWorkloadInteraction::begin_tx_conflict(owner, conn)); + return true; + } + + let table = planner.choose_table(); + let row = planner.make_row(table); + planner.push_interaction(TableWorkloadInteraction::write_conflict_insert(owner, conn, table, row)); + true +} + +fn emit_add_column(planner: &mut ScenarioPlanner<'_>, conn: usize, table: usize) -> bool { + const MAX_COLUMNS_PER_TABLE: usize = 12; + let column_idx = planner.table_plan(table).columns.len(); + if column_idx >= MAX_COLUMNS_PER_TABLE { + return false; + } + let ty = match planner.choose_index(4) { + 0 => AlgebraicType::Bool, + 1 => AlgebraicType::U64, + 2 => AlgebraicType::String, + _ => generate_supported_type_for_churn(planner), + }; + let column = ColumnPlan { + name: format!("dst_added_{table}_{column_idx}"), + ty, + }; + let default = default_value_for_type(&column.ty); + planner.add_column(table, column.clone(), default.clone()); + planner.push_interaction(TableWorkloadInteraction::add_column(conn, table, column, default)); + true +} + +fn emit_add_index(planner: &mut ScenarioPlanner<'_>, conn: usize, table: usize, visible_rows: &[SimRow]) -> bool { + let candidates = candidate_new_indexes(planner, table); + if candidates.is_empty() { + return false; + } + let cols = candidates[planner.choose_index(candidates.len())].clone(); + planner.add_index(table, cols.clone()); + planner.push_interaction(TableWorkloadInteraction::add_index(conn, table, cols.clone())); + if let Some((lower, upper)) = inclusive_bounds_for_rows(visible_rows, &cols) { + planner.push_interaction(TableWorkloadInteraction::range_scan( + conn, + table, + cols, + Bound::Included(lower), + Bound::Included(upper), + )); + } + true +} + +fn emit_unique_key_conflict_insert( + planner: &mut ScenarioPlanner<'_>, + conn: usize, + table: usize, + visible_rows: &[SimRow], +) -> bool { + let source = visible_rows[planner.choose_index(visible_rows.len())].clone(); + let Some(row) = planner.unique_key_conflict_row(table, &source) else { + return false; + }; + planner.push_interaction(TableWorkloadInteraction::unique_key_conflict_insert(conn, table, row)); + true +} + +fn generate_supported_type_for_churn(planner: &mut ScenarioPlanner<'_>) -> AlgebraicType { + match planner.choose_index(6) { + 0 => AlgebraicType::I64, + 1 => AlgebraicType::U32, + 2 => AlgebraicType::I32, + 3 => AlgebraicType::U8, + 4 => AlgebraicType::I128, + _ => AlgebraicType::U128, + } +} + +fn candidate_new_indexes(planner: &ScenarioPlanner<'_>, table: usize) -> Vec> { + let table_plan = planner.table_plan(table); + let cols = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .filter(|(_, column)| is_range_compatible(&column.ty)) + .map(|(idx, _)| idx as u16) + .collect::>(); + let mut candidates = Vec::new(); + for width in 1..=cols.len().min(3) { + let candidate = cols[..width].to_vec(); + if !table_plan.extra_indexes.contains(&candidate) { + candidates.push(candidate); + } + } + candidates +} + +fn inclusive_bounds_for_rows( + rows: &[SimRow], + cols: &[u16], +) -> Option<(spacetimedb_sats::AlgebraicValue, spacetimedb_sats::AlgebraicValue)> { + let mut sorted = rows.to_vec(); + sorted.sort_by(|lhs, rhs| { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + let lower = sorted.first()?.project_key(cols).to_algebraic_value(); + let upper = sorted.last()?.project_key(cols).to_algebraic_value(); + Some((lower, upper)) +} + fn emit_query( planner: &mut ScenarioPlanner<'_>, conn: usize, diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index f308639cfb1..40e427f465f 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -3,7 +3,7 @@ use std::ops::Bound; use spacetimedb_sats::AlgebraicValue; use crate::{ - schema::{SchemaPlan, SimRow}, + schema::{ColumnPlan, SchemaPlan, SimRow}, seed::DstRng, }; @@ -30,61 +30,73 @@ pub type TableWorkloadInteraction = PlannedInteraction; #[derive(Clone, Debug, Eq, PartialEq)] pub enum TableOperation { - BeginTx { - conn: usize, - }, - CommitTx { - conn: usize, - }, - RollbackTx { - conn: usize, - }, - Insert { - conn: usize, - table: usize, - row: SimRow, - }, - Delete { - conn: usize, - table: usize, - row: SimRow, - }, - DuplicateInsert { - conn: usize, - table: usize, - row: SimRow, - }, - DeleteMissing { + /// Start an explicit write transaction on a connection. + BeginTx { conn: usize }, + /// Commit the connection's explicit write transaction. + CommitTx { conn: usize }, + /// Roll back the connection's explicit write transaction. + RollbackTx { conn: usize }, + /// Hold a read snapshot open while later reads observe stable state. + BeginReadTx { conn: usize }, + /// Release a previously opened read snapshot. + ReleaseReadTx { conn: usize }, + /// Attempt to start a second writer while another connection owns the write lock. + BeginTxConflict { owner: usize, conn: usize }, + /// Attempt an auto-commit write while another connection owns the write lock. + WriteConflictInsert { + owner: usize, conn: usize, table: usize, row: SimRow, }, + /// Insert a new row with a fresh primary id. + Insert { conn: usize, table: usize, row: SimRow }, + /// Delete an existing visible row. + Delete { conn: usize, table: usize, row: SimRow }, + /// Reinsert an exact row that is already visible. + /// + /// RelationalDB has set semantics for identical rows, so this should be an + /// idempotent no-op rather than a unique-key error. + ExactDuplicateInsert { conn: usize, table: usize, row: SimRow }, + /// Insert a row with an existing primary id but different non-key payload. + /// + /// This is the operation that should fail with `UniqueConstraintViolation`. + UniqueKeyConflictInsert { conn: usize, table: usize, row: SimRow }, + /// Delete a row that is absent from the visible state. + DeleteMissing { conn: usize, table: usize, row: SimRow }, + /// Insert several fresh rows in one interaction. BatchInsert { conn: usize, table: usize, rows: Vec, }, + /// Delete several visible rows in one interaction. BatchDelete { conn: usize, table: usize, rows: Vec, }, - Reinsert { + /// Delete and insert the same row, stressing delete/insert ordering. + Reinsert { conn: usize, table: usize, row: SimRow }, + /// Add a column to an existing table with a default for live rows. + AddColumn { conn: usize, table: usize, - row: SimRow, - }, - PointLookup { - conn: usize, - table: usize, - id: u64, + column: ColumnPlan, + default: AlgebraicValue, }, + /// Add a non-primary index after data exists. + AddIndex { conn: usize, table: usize, cols: Vec }, + /// Query a row by primary id and compare against the model. + PointLookup { conn: usize, table: usize, id: u64 }, + /// Count rows by equality on one column and compare against the model. PredicateCount { conn: usize, table: usize, col: u16, value: AlgebraicValue, }, + /// Scan an indexed range and compare against model filtering. RangeScan { conn: usize, table: usize, @@ -92,10 +104,8 @@ pub enum TableOperation { lower: Bound, upper: Bound, }, - FullScan { - conn: usize, - table: usize, - }, + /// Scan all visible rows and compare against the model. + FullScan { conn: usize, table: usize }, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -108,6 +118,7 @@ pub enum ExpectedResult { pub enum ExpectedErrorKind { UniqueConstraintViolation, MissingRow, + WriteConflict, } impl PlannedInteraction { @@ -137,6 +148,33 @@ impl PlannedInteraction { Self::ok(TableOperation::RollbackTx { conn }) } + pub fn begin_read_tx(conn: usize) -> Self { + Self::ok(TableOperation::BeginReadTx { conn }) + } + + pub fn release_read_tx(conn: usize) -> Self { + Self::ok(TableOperation::ReleaseReadTx { conn }) + } + + pub fn begin_tx_conflict(owner: usize, conn: usize) -> Self { + Self::expected_err( + TableOperation::BeginTxConflict { owner, conn }, + ExpectedErrorKind::WriteConflict, + ) + } + + pub fn write_conflict_insert(owner: usize, conn: usize, table: usize, row: SimRow) -> Self { + Self::expected_err( + TableOperation::WriteConflictInsert { + owner, + conn, + table, + row, + }, + ExpectedErrorKind::WriteConflict, + ) + } + pub fn insert(conn: usize, table: usize, row: SimRow) -> Self { Self::ok(TableOperation::Insert { conn, table, row }) } @@ -145,9 +183,13 @@ impl PlannedInteraction { Self::ok(TableOperation::Delete { conn, table, row }) } - pub fn duplicate_insert(conn: usize, table: usize, row: SimRow) -> Self { + pub fn exact_duplicate_insert(conn: usize, table: usize, row: SimRow) -> Self { + Self::ok(TableOperation::ExactDuplicateInsert { conn, table, row }) + } + + pub fn unique_key_conflict_insert(conn: usize, table: usize, row: SimRow) -> Self { Self::expected_err( - TableOperation::DuplicateInsert { conn, table, row }, + TableOperation::UniqueKeyConflictInsert { conn, table, row }, ExpectedErrorKind::UniqueConstraintViolation, ) } @@ -171,6 +213,19 @@ impl PlannedInteraction { Self::ok(TableOperation::Reinsert { conn, table, row }) } + pub fn add_column(conn: usize, table: usize, column: ColumnPlan, default: AlgebraicValue) -> Self { + Self::ok(TableOperation::AddColumn { + conn, + table, + column, + default, + }) + } + + pub fn add_index(conn: usize, table: usize, cols: Vec) -> Self { + Self::ok(TableOperation::AddIndex { conn, table, cols }) + } + pub fn point_lookup(conn: usize, table: usize, id: u64) -> Self { Self::ok(TableOperation::PointLookup { conn, table, id }) } diff --git a/crates/dst/tests/madsim_axum_reqwest.rs b/crates/dst/tests/madsim_axum_reqwest.rs deleted file mode 100644 index ce97606bd60..00000000000 --- a/crates/dst/tests/madsim_axum_reqwest.rs +++ /dev/null @@ -1,36 +0,0 @@ -use std::{net::SocketAddr, time::Duration}; - -use axum::{routing::get, Router}; - -#[test] -fn axum_server_reqwest_client_over_madsim_tcp() { - let runtime = madsim::runtime::Runtime::with_seed_and_config(1, madsim::Config::default()); - let server_addr: SocketAddr = "10.0.0.1:3000".parse().unwrap(); - let client_addr: SocketAddr = "10.0.0.2:0".parse().unwrap(); - - let server = runtime.create_node().ip(server_addr.ip()).build(); - let client = runtime.create_node().ip(client_addr.ip()).build(); - let ready = std::sync::Arc::new(tokio::sync::Barrier::new(2)); - - let server_ready = ready.clone(); - server.spawn(async move { - let app = Router::new().route("/ping", get(|| async { "pong" })); - let listener = tokio::net::TcpListener::bind(server_addr).await.unwrap(); - server_ready.wait().await; - axum::serve(listener, app).await.unwrap(); - }); - - let client_task = client.spawn(async move { - ready.wait().await; - let url = format!("http://{server_addr}/ping"); - let body = reqwest::get(url).await.unwrap().text().await.unwrap(); - assert_eq!(body, "pong"); - }); - - runtime.block_on(async move { - tokio::time::timeout(Duration::from_secs(5), client_task) - .await - .unwrap() - .unwrap(); - }); -} diff --git a/crates/dst/tests/madsim_tcp.rs b/crates/dst/tests/madsim_tcp.rs new file mode 100644 index 00000000000..06574c43965 --- /dev/null +++ b/crates/dst/tests/madsim_tcp.rs @@ -0,0 +1,39 @@ +#![cfg(madsim)] + +use std::{net::SocketAddr, sync::Arc}; + +use tokio::{ + io::{AsyncReadExt, AsyncWriteExt}, + sync::Barrier, +}; + +#[test] +fn tcp_round_trip_over_madsim_tokio() { + let runtime = madsim::runtime::Runtime::new(); + let server_addr: SocketAddr = "10.0.0.1:1".parse().unwrap(); + let client_addr: SocketAddr = "10.0.0.2:1".parse().unwrap(); + + let server = runtime.create_node().ip(server_addr.ip()).build(); + let client = runtime.create_node().ip(client_addr.ip()).build(); + let ready = Arc::new(Barrier::new(2)); + + let server_ready = ready.clone(); + let server_task = server.spawn(async move { + let listener = tokio::net::TcpListener::bind(server_addr).await.unwrap(); + server_ready.wait().await; + let (mut stream, _) = listener.accept().await.unwrap(); + stream.write_all(b"pong").await.unwrap(); + stream.flush().await.unwrap(); + }); + + let client_task = client.spawn(async move { + ready.wait().await; + let mut stream = tokio::net::TcpStream::connect(server_addr).await.unwrap(); + let mut response = [0; 4]; + stream.read_exact(&mut response).await.unwrap(); + assert_eq!(&response, b"pong"); + }); + + runtime.block_on(server_task).unwrap(); + runtime.block_on(client_task).unwrap(); +} diff --git a/crates/io/Cargo.toml b/crates/io/Cargo.toml new file mode 100644 index 00000000000..02b6482302f --- /dev/null +++ b/crates/io/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "spacetimedb-io" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license-file = "LICENSE" +description = "Filesystem and network IO facade for SpacetimeDB crates" + +[dependencies] +tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } + +[lints] +workspace = true diff --git a/crates/io/LICENSE b/crates/io/LICENSE new file mode 120000 index 00000000000..8540cf8a991 --- /dev/null +++ b/crates/io/LICENSE @@ -0,0 +1 @@ +../../licenses/BSL.txt \ No newline at end of file diff --git a/crates/io/src/lib.rs b/crates/io/src/lib.rs new file mode 100644 index 00000000000..1640d961cc8 --- /dev/null +++ b/crates/io/src/lib.rs @@ -0,0 +1,73 @@ +//! Narrow facade for SpacetimeDB-owned async IO boundaries. +//! +//! Production builds use Tokio through the `madsim-tokio` compatibility crate. +//! Builds compiled with `--cfg madsim` use the simulator implementations exposed +//! by that same compatibility crate. +//! +//! This crate is intentionally small. It is a migration point for filesystem and +//! network APIs reached by deterministic simulation tests, not a general runtime +//! abstraction for tasks, clocks, blocking work, or shutdown. + +pub mod fs { + pub use tokio::fs::*; + + #[cfg(madsim)] + use std::{ + io::{self, Read as _}, + pin::Pin, + task::{Context, Poll}, + }; + + /// Async reader type returned by [`file_from_std`]. + #[cfg(not(madsim))] + pub type FileFromStd = tokio::fs::File; + + /// Async reader type returned by [`file_from_std`]. + #[cfg(madsim)] + pub type FileFromStd = StdFileAsyncReader; + + /// Convert a standard file handle into an async reader. + /// + /// Tokio supports this directly. The madsim filesystem type does not wrap + /// existing OS files, so madsim builds use a small `AsyncRead` adapter for + /// call sites that only need to stream an already-opened std file. + #[cfg(not(madsim))] + pub fn file_from_std(file: std::fs::File) -> FileFromStd { + tokio::fs::File::from_std(file) + } + + /// Convert a standard file handle into an async reader. + #[cfg(madsim)] + pub fn file_from_std(file: std::fs::File) -> FileFromStd { + StdFileAsyncReader(file) + } + + /// Async-read adapter for standard files in madsim builds. + #[cfg(madsim)] + pub struct StdFileAsyncReader(std::fs::File); + + #[cfg(madsim)] + impl tokio::io::AsyncRead for StdFileAsyncReader { + fn poll_read( + mut self: Pin<&mut Self>, + _cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + match self.0.read(buf.initialize_unfilled()) { + Ok(n) => { + buf.advance(n); + Poll::Ready(Ok(())) + } + Err(e) => Poll::Ready(Err(e)), + } + } + } +} + +pub mod io { + pub use tokio::io::*; +} + +pub mod net { + pub use tokio::net::*; +} diff --git a/tools/ci/README.md b/tools/ci/README.md index dbe452243f0..4326bab7ea5 100644 --- a/tools/ci/README.md +++ b/tools/ci/README.md @@ -228,6 +228,17 @@ Usage: docs - `--help`: Print help +### `io-boundary` + +**Usage:** +```bash +Usage: io-boundary +``` + +**Options:** + +- `--help`: Print help + ### `help` **Usage:** diff --git a/tools/ci/src/main.rs b/tools/ci/src/main.rs index 3c31c366324..80f9c95b1a7 100644 --- a/tools/ci/src/main.rs +++ b/tools/ci/src/main.rs @@ -278,6 +278,8 @@ enum CiCmd { TypescriptTest, /// Builds the docs site. Docs, + /// Checks that core database crates use SpacetimeDB fs/net IO boundaries. + IoBoundary, } fn run_all_clap_subcommands(skips: &[String]) -> Result<()> { @@ -306,6 +308,78 @@ fn tracked_rs_files_under(path: &str) -> Result> { .collect()) } +fn check_io_boundary() -> Result<()> { + ensure_repo_root()?; + + let mut violations = Vec::new(); + for root in ["crates/datastore", "crates/core"] { + for path in tracked_rs_files_under(root)? { + check_file_for_direct_tokio_fs_net(&path, &mut violations)?; + } + } + + if violations.is_empty() { + return Ok(()); + } + + for violation in &violations { + eprintln!("{violation}"); + } + bail!( + "direct tokio::fs/tokio::net usage is forbidden in crates/datastore and crates/core; use spacetimedb_io::{{fs, net}}" + ); +} + +fn check_file_for_direct_tokio_fs_net(path: &Path, violations: &mut Vec) -> Result<()> { + let contents = fs::read_to_string(path)?; + let mut in_tokio_use_tree = false; + + for (line_idx, line) in contents.lines().enumerate() { + let line_no = line_idx + 1; + let code = line.split("//").next().unwrap_or(line); + + if code.contains("tokio::fs") || code.contains("tokio::net") { + violations.push(format!("{}:{line_no}: direct tokio fs/net path", path.display())); + } + + if in_tokio_use_tree { + if tokio_use_tree_mentions_fs_or_net(code) { + violations.push(format!("{}:{line_no}: direct tokio fs/net import", path.display())); + } + if code.contains("};") { + in_tokio_use_tree = false; + } + continue; + } + + if code.contains("use tokio::{") { + if tokio_use_tree_mentions_fs_or_net(code) { + violations.push(format!("{}:{line_no}: direct tokio fs/net import", path.display())); + } + if !code.contains("};") { + in_tokio_use_tree = true; + } + } + } + + Ok(()) +} + +fn tokio_use_tree_mentions_fs_or_net(code: &str) -> bool { + let mut token = String::new(); + for ch in code.chars() { + if ch == '_' || ch.is_ascii_alphanumeric() { + token.push(ch); + continue; + } + if token == "fs" || token == "net" { + return true; + } + token.clear(); + } + token == "fs" || token == "net" +} + fn run_dlls() -> Result<()> { ensure_repo_root()?; @@ -532,6 +606,7 @@ fn main() -> Result<()> { Some(CiCmd::Lint) => { ensure_repo_root()?; + check_io_boundary()?; // `cargo fmt --all` only checks files that Cargo discovers through workspace/package targets. // However, we also keep Rust sources in a locations that are tracked but not part of our workspace, // so this approach properly catches all the files, where `cargo fmt` does not. @@ -715,6 +790,10 @@ fn main() -> Result<()> { run_docs_build()?; } + Some(CiCmd::IoBoundary) => { + check_io_boundary()?; + } + None => run_all_clap_subcommands(&cli.skip)?, } From 87f97ea0b72d896f00e46a96feb02bd8fe5a28a5 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 4 May 2026 14:25:49 +0530 Subject: [PATCH 24/74] crash property --- Cargo.lock | 1 + crates/dst/Cargo.toml | 1 + crates/dst/README.md | 25 ++- crates/dst/src/client.rs | 114 ++++++++++ crates/dst/src/core/mod.rs | 204 +++++++++++++++++- crates/dst/src/lib.rs | 6 + crates/dst/src/{targets => }/properties.rs | 101 ++++++--- crates/dst/src/targets/mod.rs | 1 - .../src/targets/relational_db_commitlog.rs | 117 +++++----- crates/dst/src/targets/standalone_host.rs | 29 ++- .../dst/src/workload/commitlog_ops/types.rs | 7 +- .../dst/src/workload/module_ops/generation.rs | 2 + crates/dst/src/workload/module_ops/types.rs | 3 + .../dst/src/workload/table_ops/generation.rs | 52 +++-- crates/dst/src/workload/table_ops/model.rs | 83 +++---- .../workload/table_ops/scenarios/banking.rs | 7 +- .../src/workload/table_ops/scenarios/mod.rs | 10 +- .../table_ops/scenarios/random_crud.rs | 18 +- .../dst/src/workload/table_ops/strategies.rs | 9 +- crates/dst/src/workload/table_ops/types.rs | 107 ++++----- 20 files changed, 651 insertions(+), 246 deletions(-) create mode 100644 crates/dst/src/client.rs rename crates/dst/src/{targets => }/properties.rs (90%) diff --git a/Cargo.lock b/Cargo.lock index d3ffccc7d7b..97c4cced8e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8293,6 +8293,7 @@ dependencies = [ "anyhow", "bytes", "clap 4.5.50", + "futures-util", "madsim", "madsim-tokio", "spacetimedb-cli", diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 95348d1d509..9b64572ed26 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -17,6 +17,7 @@ bench = false [dependencies] anyhow.workspace = true clap.workspace = true +futures-util.workspace = true tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } bytes.workspace = true spacetimedb-cli.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md index e0b0a1452a3..f6a8a0cbc8f 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -43,6 +43,27 @@ The core contracts are: - `StreamingProperties`: reusable property checks over observations and target accessors. +## Client Model + +DST workloads use shared logical client IDs rather than target-owned ad hoc +connection numbers. A `ClientId` is a stable actor in the generated history; a +`SessionId` is one live connection/session for that actor. A single client can +own multiple active sessions, which matters for reconnect, multi-tab, and future +replication traffic. Targets translate those IDs into their own handles: + +- `relational-db-commitlog` maps `SessionId` to direct write/read transaction + slots. +- `standalone-host` currently maps `SessionId::ZERO` to its host + `ClientConnection`; reducer interactions already carry the logical session so + multi-session host workloads can be added without changing the interaction + shape again. +- future replication targets can map `SessionId` plus endpoint/node IDs to a + client connection routed through the simulated network. + +Concrete handles stay target-owned. Shared workloads should carry logical +identity and lifecycle intent, not `RelTx`, websocket handles, or target-specific +connection objects. + ## Workload Composition DST workloads use three building blocks: @@ -94,7 +115,7 @@ Both targets reuse shared workload families and the same streaming runner. ## Properties -Properties live in `targets/properties.rs` and are selected by target. +Properties live in `src/properties.rs` and are selected by target. Table-oriented properties use `TargetPropertyAccess` so the property runtime can ask a target for rows, counts, lookups, and range scans without knowing target storage internals. @@ -176,7 +197,7 @@ Start here: - `src/workload/table_ops`: table interaction language, generation model, and scenarios. - `src/workload/commitlog_ops`: lifecycle layer over table workloads. -- `src/targets/properties.rs`: property catalog and expected model checks. +- `src/properties.rs`: property catalog and expected model checks. - `src/targets/relational_db_commitlog.rs`: target adapter for RelationalDB, commitlog durability, fault injection, close/reopen, and replay. - `src/targets/buggified_repo.rs`: deterministic disk-like fault layer. diff --git a/crates/dst/src/client.rs b/crates/dst/src/client.rs new file mode 100644 index 00000000000..6d4eec570f1 --- /dev/null +++ b/crates/dst/src/client.rs @@ -0,0 +1,114 @@ +//! Logical client and topology identifiers shared by DST workloads and targets. +//! +//! These IDs are part of the generated workload language. Targets translate +//! them into concrete handles such as direct database transaction slots, +//! `ClientConnection`s, websocket sessions, or simulated-node connections. + +use std::fmt; + +/// Stable logical client identity within one DST run. +/// +/// A `ClientId` is an actor/user identity, not a live network connection. One +/// client may own zero, one, or many [`SessionId`]s at the same time. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct ClientId(u32); + +impl ClientId { + pub const ZERO: Self = Self(0); + + pub const fn new(raw: u32) -> Self { + Self(raw) + } + + pub const fn from_index(index: usize) -> Self { + Self(index as u32) + } + + pub const fn as_u32(self) -> u32 { + self.0 + } + + pub const fn as_index(self) -> usize { + self.0 as usize + } +} + +impl fmt::Display for ClientId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "client{}", self.0) + } +} + +/// Logical live connection/session for a client. +/// +/// Current single-process targets use `SessionId` anywhere old DST code said +/// "connection": transaction slots, read snapshots, reducer-call handles, and +/// property observations. A target translates this logical session into its +/// concrete handle, such as a `RelTx` slot or `ClientConnection`. +/// +/// The `generation` field is the per-client session ordinal. Workloads can keep +/// several generations active concurrently to model one client with multiple +/// open connections, or allocate a later generation after a reconnect. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct SessionId { + pub client: ClientId, + pub generation: u32, +} + +impl SessionId { + pub const ZERO: Self = Self::new(ClientId::ZERO, 0); + + pub const fn new(client: ClientId, generation: u32) -> Self { + Self { client, generation } + } + + /// Compatibility helper for today's fixed-size session pools. + /// + /// A run with `N` connections starts as one logical client with `N` + /// sessions: `client0/session0`, `client0/session1`, ... + pub(crate) const fn from_index(index: usize) -> Self { + Self::new(ClientId::ZERO, index as u32) + } + + pub(crate) const fn as_index(self) -> usize { + self.generation as usize + } +} + +impl fmt::Display for SessionId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.session{}", self.client, self.generation) + } +} + +/// Logical server endpoint used by future client/network/replication workloads. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct EndpointId(u32); + +impl EndpointId { + pub const ZERO: Self = Self(0); + + pub const fn new(raw: u32) -> Self { + Self(raw) + } + + pub const fn as_u32(self) -> u32 { + self.0 + } +} + +/// Logical node identifier for future replication and multi-node targets. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct NodeId(u32); + +impl NodeId { + pub const ZERO: Self = Self(0); + + pub const fn new(raw: u32) -> Self { + Self(raw) + } + + pub const fn as_u32(self) -> u32 { + self.0 + } +} diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index 34e8a75e85c..69ba1810284 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -1,8 +1,14 @@ //! Core abstractions for pluggable DST workloads, engines, and properties. -use std::future::Future; +use std::{ + any::Any, + fmt::Debug, + future::Future, + panic::{self, AssertUnwindSafe}, +}; use crate::config::RunConfig; +use futures_util::FutureExt; /// Pull-based deterministic interaction source. pub trait NextInteractionSource { @@ -43,7 +49,7 @@ pub async fn run_streaming( cfg: RunConfig, ) -> anyhow::Result where - I: Clone, + I: Clone + Debug, S: NextInteractionSource, E: TargetEngine, P: StreamingProperties, @@ -60,19 +66,201 @@ where let Some(interaction) = source.next_interaction() else { break; }; - let observation = engine - .execute_interaction(&interaction) - .await - .map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; + let execution = guard_target("execute_interaction", step, Some(&interaction), || { + engine.execute_interaction(&interaction) + }) + .await + .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; + let observation = execution.map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; properties .observe(&engine, &interaction, &observation) .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; step = step.saturating_add(1); } - engine.finish(); - let outcome = engine.collect_outcome().await?; + guard_target("finish", step, Option::<&I>::None, || async { + engine.finish(); + }) + .await + .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; + let outcome = guard_target("collect_outcome", step, Option::<&I>::None, || engine.collect_outcome()) + .await + .map_err(|e| anyhow::anyhow!("property violation while collecting outcome: {e}"))??; properties .finish(&engine, &outcome) .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; Ok(outcome) } + +async fn guard_target( + phase: &'static str, + step: usize, + interaction: Option<&I>, + make_future: impl FnOnce() -> Fut, +) -> Result +where + I: Debug, + Fut: Future, +{ + let future = panic::catch_unwind(AssertUnwindSafe(make_future)) + .map_err(|payload| not_crash_error(phase, step, interaction, &payload))?; + AssertUnwindSafe(future) + .catch_unwind() + .await + .map_err(|payload| not_crash_error(phase, step, interaction, &payload)) +} + +fn not_crash_error( + phase: &'static str, + step: usize, + interaction: Option<&I>, + payload: &Box, +) -> String { + let payload = panic_payload_to_string(payload); + match interaction { + Some(interaction) => { + format!("[NotCrash] target panicked during {phase} at step {step}: interaction={interaction:?}, payload={payload}") + } + None => format!("[NotCrash] target panicked during {phase} after step {step}: payload={payload}"), + } +} + +fn panic_payload_to_string(payload: &Box) -> String { + if let Some(message) = payload.downcast_ref::<&'static str>() { + (*message).to_string() + } else if let Some(message) = payload.downcast_ref::() { + message.clone() + } else { + "".to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Clone, Debug)] + struct TestInteraction; + + struct SingleStepSource { + emitted: bool, + } + + impl SingleStepSource { + fn new() -> Self { + Self { emitted: false } + } + } + + impl NextInteractionSource for SingleStepSource { + type Interaction = TestInteraction; + + fn next_interaction(&mut self) -> Option { + if self.emitted { + None + } else { + self.emitted = true; + Some(TestInteraction) + } + } + + fn request_finish(&mut self) {} + } + + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + enum PanicPhase { + Execute, + Finish, + CollectOutcome, + } + + struct PanicEngine { + phase: PanicPhase, + } + + impl PanicEngine { + fn new(phase: PanicPhase) -> Self { + Self { phase } + } + } + + impl TargetEngine for PanicEngine { + type Observation = (); + type Outcome = (); + type Error = String; + + fn execute_interaction<'a>( + &'a mut self, + _interaction: &'a TestInteraction, + ) -> impl Future> + 'a { + async move { + if self.phase == PanicPhase::Execute { + panic!("execute panic"); + } + Ok(()) + } + } + + fn finish(&mut self) { + if self.phase == PanicPhase::Finish { + panic!("finish panic"); + } + } + + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a { + async move { + if self.phase == PanicPhase::CollectOutcome { + panic!("collect panic"); + } + Ok(()) + } + } + } + + struct NoopProperties; + + impl StreamingProperties for NoopProperties { + fn observe( + &mut self, + _engine: &PanicEngine, + _interaction: &TestInteraction, + _observation: &(), + ) -> Result<(), String> { + Ok(()) + } + + fn finish(&mut self, _engine: &PanicEngine, _outcome: &()) -> Result<(), String> { + Ok(()) + } + } + + #[tokio::test] + async fn not_crash_catches_execute_panic() { + assert_not_crash_error(PanicPhase::Execute, "execute_interaction", "execute panic").await; + } + + #[tokio::test] + async fn not_crash_catches_finish_panic() { + assert_not_crash_error(PanicPhase::Finish, "finish", "finish panic").await; + } + + #[tokio::test] + async fn not_crash_catches_collect_outcome_panic() { + assert_not_crash_error(PanicPhase::CollectOutcome, "collect_outcome", "collect panic").await; + } + + async fn assert_not_crash_error(phase: PanicPhase, expected_phase: &str, expected_payload: &str) { + let err = run_streaming( + SingleStepSource::new(), + PanicEngine::new(phase), + NoopProperties, + RunConfig::with_max_interactions(1), + ) + .await + .unwrap_err() + .to_string(); + + assert!(err.contains("[NotCrash]")); + assert!(err.contains(expected_phase)); + assert!(err.contains(expected_payload)); + } +} diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index aa556ac000b..f66a8fd8dc9 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -2,7 +2,9 @@ //! //! Public surface is intentionally narrow and centered on the CLI: //! +//! - [`client`] for logical client/session identifiers, //! - [`config`] for run budgets, +//! - [`properties`] for reusable semantic checks, //! - [`seed`] for deterministic seeds, //! - [`workload`] for scenario identifiers, //! - [`targets`] for executable relational-db / standalone-host adapters. @@ -30,10 +32,14 @@ //! 7. Shared randomness, weighting, and sampling helpers belong in the //! workload strategy module, not in ad hoc target or scenario code. +/// Logical client/session identifiers shared by workloads and targets. +pub mod client; /// Shared run-budget configuration for DST targets. pub mod config; /// Core traits/runners for pluggable workloads and targets. pub mod core; +/// Reusable semantic properties and expected-model checks. +pub(crate) mod properties; mod schema; /// Stable seed and RNG utilities used to make runs reproducible. pub mod seed; diff --git a/crates/dst/src/targets/properties.rs b/crates/dst/src/properties.rs similarity index 90% rename from crates/dst/src/targets/properties.rs rename to crates/dst/src/properties.rs index 0051bc6416f..6550ef8b87b 100644 --- a/crates/dst/src/targets/properties.rs +++ b/crates/dst/src/properties.rs @@ -1,13 +1,40 @@ -//! Target-level property runtime shared by table-oriented targets. +//! Reusable property runtime shared by DST targets. //! -//! Properties are defined once here and plugged into any target that -//! implements [`TargetPropertyAccess`]. +//! This module is the boundary between target execution and semantic checking. +//! Targets emit observations and implement [`TargetPropertyAccess`]; property +//! rules compare those observations against either the target's externally +//! visible state, an expected model, or durable replay state. +//! +//! ## Property Catalog +//! +//! - `InsertSelect`: a row inserted by a session is immediately visible to that +//! same session. +//! - `DeleteSelect`: a row deleted by a session is no longer visible to that +//! same session. +//! - `SelectSelectOptimizer`: a NoREC-style check comparing indexed/filter +//! query results with a direct row projection. +//! - `WhereTrueFalseNull`: a TLP-style partition check for boolean predicates. +//! - `NotCrash`: target interactions, finish, and outcome collection must not +//! panic. This is enforced by the shared streaming runner. +//! - `IndexRangeExcluded`: range scans over composite indexes obey excluded +//! upper bounds. +//! - `BankingTablesMatch`: scenario-level shadow tables stay identical. +//! - `DynamicMigrationAutoInc`: migrated dynamic tables keep advancing integer +//! IDs after schema changes. +//! - `DurableReplayMatchesModel`: replayed durable state matches the expected +//! committed model. +//! - `ExpectedErrorMatches`: generated expected failures are the failures the +//! target actually reports. +//! - `PointLookupMatchesModel`, `PredicateCountMatchesModel`, +//! `RangeScanMatchesModel`, and `FullScanMatchesModel`: query observations +//! match the expected visibility model for the acting session. use std::ops::Bound; use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use crate::{ + client::SessionId, core::StreamingProperties, schema::{SchemaPlan, SimRow}, workload::{ @@ -22,7 +49,7 @@ use crate::{ /// Target adapter for property evaluation. pub(crate) trait TargetPropertyAccess { fn schema_plan(&self) -> &SchemaPlan; - fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String>; + fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String>; fn collect_rows_for_table(&self, table: usize) -> Result, String>; fn count_rows(&self, table: usize) -> Result; fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result; @@ -38,18 +65,33 @@ pub(crate) trait TargetPropertyAccess { /// Canonical property IDs that can be selected by targets. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum PropertyKind { + /// Target execution must not panic. Enforced by the shared streaming runner. + NotCrash, + /// Inserted rows are visible to the inserting session. InsertSelect, + /// Deleted rows disappear from the deleting session's view. DeleteSelect, + /// Optimized predicate counts agree with direct row projection. SelectSelectOptimizer, + /// Boolean partitions preserve total cardinality. WhereTrueFalseNull, + /// Composite index range scans implement excluded upper bounds correctly. IndexRangeExcluded, + /// Banking scenario debit and credit shadow tables remain identical. BankingTablesMatch, + /// Auto-increment IDs continue advancing after dynamic table migration. DynamicMigrationAutoInc, + /// Durable replay state equals the expected committed model. DurableReplayMatchesModel, + /// Expected-error interactions fail with the expected error class. ExpectedErrorMatches, + /// Point lookups match the expected session-visible model. PointLookupMatchesModel, + /// Predicate counts match the expected session-visible model. PredicateCountMatchesModel, + /// Range scans match the expected session-visible model. RangeScanMatchesModel, + /// Full scans match the expected session-visible model. FullScanMatchesModel, } @@ -66,33 +108,33 @@ pub(crate) struct DynamicMigrationProbe { pub(crate) enum TableObservation { Applied, RowInserted { - conn: usize, + conn: SessionId, table: usize, row: SimRow, in_tx: bool, }, RowDeleted { - conn: usize, + conn: SessionId, table: usize, row: SimRow, in_tx: bool, }, ExpectedError(ExpectedErrorKind), PointLookup { - conn: usize, + conn: SessionId, table: usize, id: u64, actual: Option, }, PredicateCount { - conn: usize, + conn: SessionId, table: usize, col: u16, value: AlgebraicValue, actual: usize, }, RangeScan { - conn: usize, + conn: SessionId, table: usize, cols: Vec, lower: Bound, @@ -100,7 +142,7 @@ pub(crate) enum TableObservation { actual: Vec, }, FullScan { - conn: usize, + conn: SessionId, table: usize, actual: Vec, }, @@ -135,13 +177,13 @@ pub(crate) struct PropertyContext<'a> { pub(crate) enum PropertyEvent<'a> { TableInteractionApplied, RowInserted { - conn: usize, + conn: SessionId, table: usize, row: &'a SimRow, in_tx: bool, }, RowDeleted { - conn: usize, + conn: SessionId, table: usize, row: &'a SimRow, in_tx: bool, @@ -151,20 +193,20 @@ pub(crate) enum PropertyEvent<'a> { interaction: &'a TableWorkloadInteraction, }, PointLookup { - conn: usize, + conn: SessionId, table: usize, id: u64, actual: &'a Option, }, PredicateCount { - conn: usize, + conn: SessionId, table: usize, col: u16, value: &'a AlgebraicValue, actual: usize, }, RangeScan { - conn: usize, + conn: SessionId, table: usize, cols: &'a [u16], lower: &'a Bound, @@ -172,7 +214,7 @@ pub(crate) enum PropertyEvent<'a> { actual: &'a [SimRow], }, FullScan { - conn: usize, + conn: SessionId, table: usize, actual: &'a [SimRow], }, @@ -205,17 +247,17 @@ impl TableModel { self.expected.clone().committed_rows() } - pub fn lookup_by_id(&self, conn: usize, table: usize, id: u64) -> Option { + pub fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { self.expected.lookup_by_id(conn, table, id) } - pub fn predicate_count(&self, conn: usize, table: usize, col: u16, value: &AlgebraicValue) -> usize { + pub fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { self.expected.predicate_count(conn, table, col, value) } pub fn range_scan( &self, - conn: usize, + conn: SessionId, table: usize, cols: &[u16], lower: &Bound, @@ -224,7 +266,7 @@ impl TableModel { self.expected.range_scan(conn, table, cols, lower, upper) } - pub fn full_scan(&self, conn: usize, table: usize) -> Vec { + pub fn full_scan(&self, conn: SessionId, table: usize) -> Vec { let mut rows = self.expected.visible_rows(conn, table); rows.sort_by_key(|row| row.id().unwrap_or_default()); rows @@ -242,6 +284,7 @@ impl PropertyRuntime { let mut rules: Vec = Vec::with_capacity(kinds.len()); for kind in kinds { match kind { + PropertyKind::NotCrash => rules.push(RuleEntry::new(*kind, Box::::default())), PropertyKind::InsertSelect => rules.push(RuleEntry::new(*kind, Box::::default())), PropertyKind::DeleteSelect => rules.push(RuleEntry::new(*kind, Box::::default())), PropertyKind::SelectSelectOptimizer => rules.push(RuleEntry::new(*kind, Box::::default())), @@ -339,7 +382,7 @@ impl PropertyRuntime { &mut self, access: &dyn TargetPropertyAccess, _step: u64, - conn: usize, + conn: SessionId, table: usize, row: &SimRow, in_tx: bool, @@ -368,7 +411,7 @@ impl PropertyRuntime { &mut self, access: &dyn TargetPropertyAccess, _step: u64, - conn: usize, + conn: SessionId, table: usize, row: &SimRow, in_tx: bool, @@ -420,7 +463,7 @@ impl PropertyRuntime { pub fn on_point_lookup( &mut self, access: &dyn TargetPropertyAccess, - conn: usize, + conn: SessionId, table: usize, id: u64, actual: &Option, @@ -446,7 +489,7 @@ impl PropertyRuntime { pub fn on_predicate_count( &mut self, access: &dyn TargetPropertyAccess, - conn: usize, + conn: SessionId, table: usize, col: u16, value: &AlgebraicValue, @@ -475,7 +518,7 @@ impl PropertyRuntime { pub fn on_range_scan( &mut self, access: &dyn TargetPropertyAccess, - conn: usize, + conn: SessionId, table: usize, cols: &[u16], lower: &Bound, @@ -505,7 +548,7 @@ impl PropertyRuntime { pub fn on_full_scan( &mut self, access: &dyn TargetPropertyAccess, - conn: usize, + conn: SessionId, table: usize, actual: &[SimRow], ) -> Result<(), String> { @@ -686,6 +729,7 @@ impl RuleEntry { impl Default for PropertyRuntime { fn default() -> Self { Self::with_kinds(&[ + PropertyKind::NotCrash, PropertyKind::InsertSelect, PropertyKind::DeleteSelect, PropertyKind::SelectSelectOptimizer, @@ -711,6 +755,11 @@ trait PropertyRule { } } +#[derive(Default)] +struct NotCrashRule; + +impl PropertyRule for NotCrashRule {} + struct ExpectedTableStateRule { scenario: S, schema: SchemaPlan, diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index e61f19fc3eb..a619c1a8be5 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -2,6 +2,5 @@ pub(crate) mod buggified_repo; pub mod descriptor; -pub(crate) mod properties; pub mod relational_db_commitlog; pub mod standalone_host; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 392e034aad0..9644011e547 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -1,6 +1,6 @@ //! RelationalDB DST target with mocked commitlog file chaos and replay checks. -use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, panic::AssertUnwindSafe, sync::Arc}; +use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, sync::Arc}; use spacetimedb_commitlog::repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}; use spacetimedb_core::{ @@ -28,14 +28,15 @@ use spacetimedb_table::page_pool::PagePool; use tracing::{debug, info, trace}; use crate::{ + client::SessionId, config::{CommitlogFaultProfile, RunConfig}, core::{self, TargetEngine}, + properties::{ + CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableObservation, TargetPropertyAccess, + }, schema::{SchemaPlan, SimRow}, seed::DstSeed, targets::buggified_repo::{is_injected_disk_error_text, BuggifiedRepo, CommitlogFaultConfig}, - targets::properties::{ - CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableObservation, TargetPropertyAccess, - }, workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, @@ -448,17 +449,9 @@ impl RelationalDbEngine { } fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result { - match std::panic::catch_unwind(AssertUnwindSafe(|| self.execute_table_op_inner(interaction))) { - Ok(Ok(observation)) => { - self.stats.record_table_operation(&interaction.op); - Ok(observation) - } - Ok(Err(err)) => Err(err), - Err(payload) => Err(format!( - "[DatastoreNeverPanics] interaction panicked: interaction={interaction:?}, payload={}", - panic_payload_to_string(&payload) - )), - } + let observation = self.execute_table_op_inner(interaction)?; + self.stats.record_table_operation(&interaction.op); + Ok(observation) } fn execute_table_op_inner(&mut self, interaction: &TableWorkloadInteraction) -> Result { @@ -466,10 +459,10 @@ impl RelationalDbEngine { match &interaction.op { TableOperation::BeginTx { conn } => { self.execution.ensure_known_connection(*conn)?; - if self.read_tx_by_connection[*conn].is_some() { + if self.read_tx_by_connection[conn.as_index()].is_some() { return Err(format!("connection {conn} already has open read transaction")); } - if self.execution.tx_by_connection[*conn].is_some() { + if self.execution.tx_by_connection[conn.as_index()].is_some() { return Err(format!("connection {conn} already has open transaction")); } if let Some(owner) = self.execution.active_writer { @@ -477,7 +470,7 @@ impl RelationalDbEngine { "connection {conn} cannot begin write transaction while connection {owner} owns lock" )); } - self.execution.tx_by_connection[*conn] = Some( + self.execution.tx_by_connection[conn.as_index()] = Some( self.db()? .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), ); @@ -487,20 +480,20 @@ impl RelationalDbEngine { } TableOperation::BeginReadTx { conn } => { self.execution.ensure_known_connection(*conn)?; - if self.execution.tx_by_connection[*conn].is_some() { + if self.execution.tx_by_connection[conn.as_index()].is_some() { return Err(format!("connection {conn} already has open write transaction")); } - if self.read_tx_by_connection[*conn].is_some() { + if self.read_tx_by_connection[conn.as_index()].is_some() { return Err(format!("connection {conn} already has open read transaction")); } let tx = self.db()?.begin_tx(Workload::ForTests); - self.read_tx_by_connection[*conn] = Some(tx); + self.read_tx_by_connection[conn.as_index()] = Some(tx); self.stats.record_read_tx(); Ok(TableObservation::Applied) } TableOperation::ReleaseReadTx { conn } => { self.execution.ensure_known_connection(*conn)?; - let tx = self.read_tx_by_connection[*conn] + let tx = self.read_tx_by_connection[conn.as_index()] .take() .ok_or_else(|| format!("connection {conn} has no read transaction to release"))?; let _ = self.db()?.release_tx(tx); @@ -535,7 +528,7 @@ impl RelationalDbEngine { } TableOperation::CommitTx { conn } => { self.execution.ensure_writer_owner(*conn, "commit")?; - let tx = self.execution.tx_by_connection[*conn] + let tx = self.execution.tx_by_connection[conn.as_index()] .take() .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; let committed = self @@ -549,7 +542,7 @@ impl RelationalDbEngine { } TableOperation::RollbackTx { conn } => { self.execution.ensure_writer_owner(*conn, "rollback")?; - let tx = self.execution.tx_by_connection[*conn] + let tx = self.execution.tx_by_connection[conn.as_index()] .take() .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = self.db()?.rollback_mut_tx(tx); @@ -558,7 +551,7 @@ impl RelationalDbEngine { Ok(TableObservation::CommitOrRollback) } TableOperation::Insert { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); + let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); let inserted_row = self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine .base_table_ids @@ -582,7 +575,7 @@ impl RelationalDbEngine { }) } TableOperation::Delete { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); + let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine .base_table_ids @@ -605,7 +598,7 @@ impl RelationalDbEngine { }) } TableOperation::ExactDuplicateInsert { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); + let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); let before = self.collect_rows_in_connection(*conn, *table)?; let inserted_row = self.with_mut_tx(*conn, |engine, tx| { let table_id = engine.table_id_for_index(*table)?; @@ -672,7 +665,7 @@ impl RelationalDbEngine { } } TableOperation::BatchInsert { conn, table, rows } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); + let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine .base_table_ids @@ -693,7 +686,7 @@ impl RelationalDbEngine { Ok(TableObservation::Applied) } TableOperation::BatchDelete { conn, table, rows } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); + let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine .base_table_ids @@ -713,7 +706,7 @@ impl RelationalDbEngine { Ok(TableObservation::Applied) } TableOperation::Reinsert { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[*conn].is_some(); + let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); self.with_mut_tx(*conn, |engine, tx| { let table_id = *engine .base_table_ids @@ -840,19 +833,19 @@ impl RelationalDbEngine { fn with_mut_tx( &mut self, - conn: usize, + conn: SessionId, mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result, ) -> Result { self.execution.ensure_known_connection(conn)?; - if self.read_tx_by_connection[conn].is_some() { + if self.read_tx_by_connection[conn.as_index()].is_some() { return Err(format!("connection {conn} cannot write while read transaction is open")); } - if self.execution.tx_by_connection[conn].is_some() { - let mut tx = self.execution.tx_by_connection[conn] + if self.execution.tx_by_connection[conn.as_index()].is_some() { + let mut tx = self.execution.tx_by_connection[conn.as_index()] .take() .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; let value = f(self, &mut tx)?; - self.execution.tx_by_connection[conn] = Some(tx); + self.execution.tx_by_connection[conn.as_index()] = Some(tx); return Ok(value); } @@ -877,7 +870,7 @@ impl RelationalDbEngine { Ok(value) } - fn expect_write_conflict(&self, owner: usize, conn: usize) -> Result<(), String> { + fn expect_write_conflict(&self, owner: SessionId, conn: SessionId) -> Result<(), String> { self.execution.ensure_known_connection(owner)?; self.execution.ensure_known_connection(conn)?; if owner == conn { @@ -889,7 +882,7 @@ impl RelationalDbEngine { self.execution.active_writer )); } - if self.read_tx_by_connection[conn].is_some() { + if self.read_tx_by_connection[conn.as_index()].is_some() { return Err(format!( "conflicting connection {conn} unexpectedly has a read transaction" )); @@ -897,7 +890,7 @@ impl RelationalDbEngine { Ok(()) } - fn create_dynamic_table(&mut self, conn: usize, slot: u32) -> Result { + fn create_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { if self.execution.active_writer.is_some() { trace!( step = self.step, @@ -907,7 +900,7 @@ impl RelationalDbEngine { return Ok(CommitlogObservation::Skipped); } let conn = self.normalize_conn(conn); - debug!(step = self.step, conn, slot, "create dynamic table"); + debug!(step = self.step, conn = %conn, slot, "create dynamic table"); self.with_mut_tx(conn, |engine, tx| { if engine.dynamic_tables.contains_key(&slot) { return Ok(()); @@ -940,7 +933,7 @@ impl RelationalDbEngine { Ok(CommitlogObservation::Applied) } - fn drop_dynamic_table(&mut self, conn: usize, slot: u32) -> Result { + fn drop_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { if self.execution.active_writer.is_some() { trace!( step = self.step, @@ -950,7 +943,7 @@ impl RelationalDbEngine { return Ok(CommitlogObservation::Skipped); } let conn = self.normalize_conn(conn); - debug!(step = self.step, conn, slot, "drop dynamic table"); + debug!(step = self.step, conn = %conn, slot, "drop dynamic table"); self.with_mut_tx(conn, |engine, tx| { let Some(state) = engine.dynamic_tables.remove(&slot) else { return Ok(()); @@ -967,7 +960,7 @@ impl RelationalDbEngine { Ok(CommitlogObservation::Applied) } - fn migrate_dynamic_table(&mut self, conn: usize, slot: u32) -> Result { + fn migrate_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { if self.execution.active_writer.is_some() { trace!( step = self.step, @@ -977,7 +970,7 @@ impl RelationalDbEngine { return Ok(CommitlogObservation::Skipped); } let conn = self.normalize_conn(conn); - debug!(step = self.step, conn, slot, "migrate dynamic table"); + debug!(step = self.step, conn = %conn, slot, "migrate dynamic table"); let probe = self.with_mut_tx(conn, |engine, tx| { let Some(state) = engine.dynamic_tables.get(&slot).cloned() else { return Ok(None); @@ -1028,7 +1021,7 @@ impl RelationalDbEngine { .unwrap_or(CommitlogObservation::Skipped)) } - fn normalize_conn(&self, conn: usize) -> usize { + fn normalize_conn(&self, conn: SessionId) -> SessionId { self.execution.active_writer.unwrap_or(conn) } @@ -1069,16 +1062,16 @@ impl RelationalDbEngine { .ok_or_else(|| format!("table {table} out of range")) } - fn lookup_base_row(&self, conn: usize, table: usize, id: u64) -> Result, String> { + fn lookup_base_row(&self, conn: SessionId, table: usize, id: u64) -> Result, String> { let table_id = self.table_id_for_index(table)?; - if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { Ok(self .db()? .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) .map_err(|err| format!("in-tx lookup failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .next()) - } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn) { + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { Ok(self .db()? .iter_by_col_eq(tx, table_id, 0u16, &AlgebraicValue::U64(id)) @@ -1100,9 +1093,9 @@ impl RelationalDbEngine { } } - fn collect_rows_in_connection(&self, conn: usize, table: usize) -> Result, String> { + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { let table_id = self.table_id_for_index(table)?; - if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { let mut rows = self .db()? .iter_mut(tx, table_id) @@ -1111,7 +1104,7 @@ impl RelationalDbEngine { .collect::>(); rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) - } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn) { + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { let mut rows = self .db()? .iter(tx, table_id) @@ -1127,19 +1120,19 @@ impl RelationalDbEngine { fn count_by_col_eq_in_connection( &self, - conn: usize, + conn: SessionId, table: usize, col: u16, value: &AlgebraicValue, ) -> Result { let table_id = self.table_id_for_index(table)?; - if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { Ok(self .db()? .iter_by_col_eq_mut(tx, table_id, col, value) .map_err(|err| format!("in-tx predicate query failed: {err}"))? .count()) - } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn) { + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { Ok(self .db()? .iter_by_col_eq(tx, table_id, col, value) @@ -1152,7 +1145,7 @@ impl RelationalDbEngine { fn range_scan_in_connection( &self, - conn: usize, + conn: SessionId, table: usize, cols: &[u16], lower: Bound, @@ -1160,13 +1153,13 @@ impl RelationalDbEngine { ) -> Result, String> { let table_id = self.table_id_for_index(table)?; let col_list = cols.iter().copied().collect::(); - let mut rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn) { + let mut rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { self.db()? .iter_by_col_range_mut(tx, table_id, col_list, (lower, upper)) .map_err(|err| format!("in-tx range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>() - } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn) { + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { self.db()? .iter_by_col_range(tx, table_id, col_list, (lower, upper)) .map_err(|err| format!("read-tx range scan failed: {err}"))? @@ -1357,7 +1350,7 @@ impl TargetPropertyAccess for RelationalDbEngine { &self.base_schema } - fn lookup_in_connection(&self, conn: usize, table: usize, id: u64) -> Result, String> { + fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String> { Self::lookup_base_row(self, conn, table, id) } @@ -1534,16 +1527,6 @@ fn is_unique_constraint_violation(err: &DBError) -> bool { ) } -fn panic_payload_to_string(payload: &Box) -> String { - if let Some(message) = payload.downcast_ref::() { - message.clone() - } else if let Some(message) = payload.downcast_ref::<&'static str>() { - (*message).to_string() - } else { - "".to_string() - } -} - fn compare_rows_for_range(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { lhs.project_key(cols) .to_algebraic_value() diff --git a/crates/dst/src/targets/standalone_host.rs b/crates/dst/src/targets/standalone_host.rs index f58f19d6dac..d7429ef9463 100644 --- a/crates/dst/src/targets/standalone_host.rs +++ b/crates/dst/src/targets/standalone_host.rs @@ -28,6 +28,7 @@ use spacetimedb_standalone::{StandaloneEnv, StandaloneOptions}; use tracing::trace; use crate::{ + client::SessionId, config::RunConfig, core::{self, StreamingProperties, TargetEngine}, seed::DstSeed, @@ -136,9 +137,14 @@ impl StandaloneHostEngine { SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos() ))); let _ = std::fs::remove_dir_all(&root_dir); - let session = open_session(&root_dir, &module, None, connection_id_for_session(seed, 0)) - .await - .map_err(anyhow::Error::msg)?; + let session = open_session( + &root_dir, + &module, + None, + connection_id_for_session(seed, SessionId::ZERO, 0), + ) + .await + .map_err(anyhow::Error::msg)?; Ok(Self { root_dir, session: Some(session), @@ -157,7 +163,10 @@ impl StandaloneHostEngine { async fn execute(&mut self, interaction: &ModuleInteraction) -> Result<(), String> { self.step = self.step.saturating_add(1); match interaction { - ModuleInteraction::CallReducer { reducer, args } => { + ModuleInteraction::CallReducer { session, reducer, args } => { + if *session != SessionId::ZERO { + return Err(format!("standalone-host target has no session for {session}")); + } self.reducer_calls = self.reducer_calls.saturating_add(1); let request_id = (self.step as u32).saturating_sub(1); let product = ProductValue::from_iter(args.iter().cloned()); @@ -202,7 +211,7 @@ impl StandaloneHostEngine { .db_identity; let old = self.session.take(); drop(old); - let connection_id = connection_id_for_session(self.seed, self.session_generation); + let connection_id = connection_id_for_session(self.seed, SessionId::ZERO, self.session_generation); self.session_generation = self.session_generation.saturating_add(1); self.session = Some(open_session(&self.root_dir, &self.module, Some(db_identity), connection_id).await?); @@ -272,9 +281,13 @@ fn is_expected_error(_reducer: &str, msg: &str) -> bool { msg.contains("permission denied") } -fn connection_id_for_session(seed: DstSeed, generation: u64) -> ConnectionId { - let high = seed.fork(1_000 + generation.saturating_mul(2)).0 as u128; - let low = seed.fork(1_001 + generation.saturating_mul(2)).0 as u128; +fn connection_id_for_session(seed: DstSeed, session: SessionId, handle_generation: u64) -> ConnectionId { + let base = 1_000u64 + .saturating_add((session.client.as_u32() as u64).saturating_mul(1_000_000)) + .saturating_add((session.generation as u64).saturating_mul(10_000)) + .saturating_add(handle_generation.saturating_mul(2)); + let high = seed.fork(base).0 as u128; + let low = seed.fork(base.saturating_add(1)).0 as u128; let id = (high << 64) | low; ConnectionId::from_u128(id.max(1)) } diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index 6cb00c11c44..ece7687de04 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -1,6 +1,7 @@ //! Serializable interaction model for relational-db + commitlog DST. use crate::{ + client::SessionId, config::CommitlogFaultProfile, schema::SimRow, workload::table_ops::{TableWorkloadInteraction, TableWorkloadOutcome}, @@ -12,11 +13,11 @@ pub enum CommitlogInteraction { /// Reused base workload interaction from `table_ops`. Table(TableWorkloadInteraction), /// Create a dynamic user table for a logical slot. - CreateDynamicTable { conn: usize, slot: u32 }, + CreateDynamicTable { conn: SessionId, slot: u32 }, /// Drop a previously created dynamic user table. - DropDynamicTable { conn: usize, slot: u32 }, + DropDynamicTable { conn: SessionId, slot: u32 }, /// Migrate dynamic table schema for a slot. - MigrateDynamicTable { conn: usize, slot: u32 }, + MigrateDynamicTable { conn: SessionId, slot: u32 }, /// Ask the mock commitlog file layer to run a sync attempt. ChaosSync, /// Close and restart the database from durable history. diff --git a/crates/dst/src/workload/module_ops/generation.rs b/crates/dst/src/workload/module_ops/generation.rs index 3467ab60503..119922dca9e 100644 --- a/crates/dst/src/workload/module_ops/generation.rs +++ b/crates/dst/src/workload/module_ops/generation.rs @@ -1,4 +1,5 @@ use crate::{ + client::SessionId, core::NextInteractionSource, schema::generate_value_for_type, seed::{DstRng, DstSeed}, @@ -70,6 +71,7 @@ impl ModuleWorkloadSource { args.push(generate_value_for_type(&mut self.rng, ty, arg_index)); } Some(ModuleInteraction::CallReducer { + session: SessionId::ZERO, reducer: spec.name.clone(), args, }) diff --git a/crates/dst/src/workload/module_ops/types.rs b/crates/dst/src/workload/module_ops/types.rs index 9d57f185c1e..77d063a3b2a 100644 --- a/crates/dst/src/workload/module_ops/types.rs +++ b/crates/dst/src/workload/module_ops/types.rs @@ -1,5 +1,7 @@ use spacetimedb_sats::AlgebraicType; +use crate::client::SessionId; + /// Single v1 scenario for standalone host target. #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub enum HostScenarioId { @@ -18,6 +20,7 @@ pub struct ModuleReducerSpec { #[derive(Clone, Debug, Eq, PartialEq)] pub enum ModuleInteraction { CallReducer { + session: SessionId, reducer: String, args: Vec, }, diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index 085279b0871..792fdf2ba13 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -1,6 +1,7 @@ use std::collections::VecDeque; use crate::{ + client::SessionId, core::NextInteractionSource, schema::{ColumnPlan, SchemaPlan, TablePlan}, seed::{DstRng, DstSeed}, @@ -69,11 +70,11 @@ impl<'a> ScenarioPlanner<'a> { self.model.connections.len() } - pub fn active_writer(&self) -> Option { + pub fn active_writer(&self) -> Option { self.model.active_writer() } - pub fn has_read_tx(&self, conn: usize) -> bool { + pub fn has_read_tx(&self, conn: SessionId) -> bool { self.model.has_read_tx(conn) } @@ -81,23 +82,23 @@ impl<'a> ScenarioPlanner<'a> { self.model.any_read_tx() } - pub fn begin_read_tx(&mut self, conn: usize) { + pub fn begin_read_tx(&mut self, conn: SessionId) { self.model.begin_read_tx(conn); } - pub fn release_read_tx(&mut self, conn: usize) { + pub fn release_read_tx(&mut self, conn: SessionId) { self.model.release_read_tx(conn); } - pub fn begin_tx(&mut self, conn: usize) { + pub fn begin_tx(&mut self, conn: SessionId) { self.model.begin_tx(conn); } - pub fn commit_tx(&mut self, conn: usize) { + pub fn commit_tx(&mut self, conn: SessionId) { self.model.commit(conn); } - pub fn rollback_tx(&mut self, conn: usize) { + pub fn rollback_tx(&mut self, conn: SessionId) { self.model.rollback(conn); } @@ -105,7 +106,13 @@ impl<'a> ScenarioPlanner<'a> { /// /// The shared generator owns transaction lifecycle so scenario code can /// focus on domain operations like inserts, deletes, and range checks. - pub fn maybe_control_tx(&mut self, conn: usize, begin_pct: usize, commit_pct: usize, rollback_pct: usize) -> bool { + pub fn maybe_control_tx( + &mut self, + conn: SessionId, + begin_pct: usize, + commit_pct: usize, + rollback_pct: usize, + ) -> bool { match (TxControlChoice { begin_pct, commit_pct, @@ -114,7 +121,7 @@ impl<'a> ScenarioPlanner<'a> { .sample(self.rng) { TxControlAction::Begin - if !self.model.connections[conn].in_tx + if !self.model.connections[conn.as_index()].in_tx && !self.model.has_read_tx(conn) && self.model.active_writer().is_none() => { @@ -122,12 +129,12 @@ impl<'a> ScenarioPlanner<'a> { self.pending.push_back(TableWorkloadInteraction::begin_tx(conn)); true } - TxControlAction::Commit if self.model.connections[conn].in_tx => { + TxControlAction::Commit if self.model.connections[conn.as_index()].in_tx => { self.model.commit(conn); self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); true } - TxControlAction::Rollback if self.model.connections[conn].in_tx => { + TxControlAction::Rollback if self.model.connections[conn.as_index()].in_tx => { self.model.rollback(conn); self.pending.push_back(TableWorkloadInteraction::rollback_tx(conn)); true @@ -136,7 +143,7 @@ impl<'a> ScenarioPlanner<'a> { } } - pub fn visible_rows(&self, conn: usize, table: usize) -> Vec { + pub fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { self.model.visible_rows(conn, table) } @@ -148,23 +155,23 @@ impl<'a> ScenarioPlanner<'a> { self.model.make_row(self.rng, table) } - pub fn insert(&mut self, conn: usize, table: usize, row: crate::schema::SimRow) { + pub fn insert(&mut self, conn: SessionId, table: usize, row: crate::schema::SimRow) { self.model.insert(conn, table, row); } - pub fn batch_insert(&mut self, conn: usize, table: usize, rows: &[crate::schema::SimRow]) { + pub fn batch_insert(&mut self, conn: SessionId, table: usize, rows: &[crate::schema::SimRow]) { self.model.batch_insert(conn, table, rows); } - pub fn delete(&mut self, conn: usize, table: usize, row: crate::schema::SimRow) { + pub fn delete(&mut self, conn: SessionId, table: usize, row: crate::schema::SimRow) { self.model.delete(conn, table, row); } - pub fn batch_delete(&mut self, conn: usize, table: usize, rows: &[crate::schema::SimRow]) { + pub fn batch_delete(&mut self, conn: SessionId, table: usize, rows: &[crate::schema::SimRow]) { self.model.batch_delete(conn, table, rows); } - pub fn reinsert(&mut self, conn: usize, table: usize, row: crate::schema::SimRow) { + pub fn reinsert(&mut self, conn: SessionId, table: usize, row: crate::schema::SimRow) { self.model.delete(conn, table, row.clone()); self.model.insert(conn, table, row); } @@ -177,7 +184,7 @@ impl<'a> ScenarioPlanner<'a> { self.model.add_index(table, cols); } - pub fn absent_row(&mut self, conn: usize, table: usize) -> crate::schema::SimRow { + pub fn absent_row(&mut self, conn: SessionId, table: usize) -> crate::schema::SimRow { self.model.absent_row(self.rng, conn, table) } @@ -228,9 +235,9 @@ impl TableWorkloadSource { // Once the workload budget is spent, stop asking the scenario for // more work and only flush any open transaction state. while self.finalize_conn < self.num_connections { - let conn = self.finalize_conn; + let conn = SessionId::from_index(self.finalize_conn); self.finalize_conn += 1; - if self.model.connections[conn].in_tx { + if self.model.connections[conn.as_index()].in_tx { self.model.commit(conn); self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); return; @@ -250,7 +257,10 @@ impl TableWorkloadSource { // rolls back. Otherwise pick a fresh connection uniformly. let conn = if let Some(active_writer) = self.model.active_writer() { active_writer - } else if let Some(read_conn) = (0..self.num_connections).find(|&conn| self.model.has_read_tx(conn)) { + } else if let Some(read_conn) = (0..self.num_connections) + .map(SessionId::from_index) + .find(|&conn| self.model.has_read_tx(conn)) + { // The current RelationalDB target can block when a write transaction // starts behind an open read transaction. Keep driving the snapshot // holder until it releases; interleaved read/write snapshots should diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index 4abfefcf24e..ab78106f019 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -3,6 +3,7 @@ use std::ops::Bound; use spacetimedb_sats::AlgebraicValue; use crate::{ + client::SessionId, schema::{distinct_value_for_type, generate_value_for_type, ColumnPlan, SchemaPlan, SimRow}, seed::{DstRng, DstSeed}, }; @@ -20,7 +21,7 @@ pub(crate) struct GenerationModel { pub(crate) connections: Vec, committed: Vec>, next_ids: Vec, - active_writer: Option, + active_writer: Option, } #[derive(Clone, Debug, Default)] @@ -55,12 +56,13 @@ impl GenerationModel { SimRow { values } } - pub(crate) fn visible_rows(&self, conn: usize, table: usize) -> Vec { - if let Some(snapshot) = &self.connections[conn].read_snapshot { + pub(crate) fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let conn_idx = conn.as_index(); + if let Some(snapshot) = &self.connections[conn_idx].read_snapshot { return snapshot[table].clone(); } let mut rows = self.committed[table].clone(); - let pending = &self.connections[conn]; + let pending = &self.connections[conn_idx]; for (pending_table, row) in &pending.staged_deletes { if *pending_table == table { rows.retain(|candidate| candidate != row); @@ -74,7 +76,7 @@ impl GenerationModel { rows } - pub(crate) fn absent_row(&mut self, rng: &mut DstRng, conn: usize, table: usize) -> SimRow { + pub(crate) fn absent_row(&mut self, rng: &mut DstRng, conn: SessionId, table: usize) -> SimRow { let mut row = self.make_row(rng, table); while self.visible_rows(conn, table).iter().any(|candidate| candidate == &row) { row = self.make_row(rng, table); @@ -95,12 +97,12 @@ impl GenerationModel { Some(row) } - pub(crate) fn active_writer(&self) -> Option { + pub(crate) fn active_writer(&self) -> Option { self.active_writer } - pub(crate) fn has_read_tx(&self, conn: usize) -> bool { - self.connections[conn].read_snapshot.is_some() + pub(crate) fn has_read_tx(&self, conn: SessionId) -> bool { + self.connections[conn.as_index()].read_snapshot.is_some() } pub(crate) fn any_read_tx(&self) -> bool { @@ -109,8 +111,8 @@ impl GenerationModel { .any(|connection| connection.read_snapshot.is_some()) } - pub(crate) fn begin_read_tx(&mut self, conn: usize) { - let pending = &mut self.connections[conn]; + pub(crate) fn begin_read_tx(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; assert!(!pending.in_tx, "connection already has write transaction"); assert!( pending.read_snapshot.is_none(), @@ -119,16 +121,16 @@ impl GenerationModel { pending.read_snapshot = Some(self.committed.clone()); } - pub(crate) fn release_read_tx(&mut self, conn: usize) { + pub(crate) fn release_read_tx(&mut self, conn: SessionId) { assert!( - self.connections[conn].read_snapshot.take().is_some(), + self.connections[conn.as_index()].read_snapshot.take().is_some(), "connection has no read transaction" ); } - pub(crate) fn begin_tx(&mut self, conn: usize) { + pub(crate) fn begin_tx(&mut self, conn: SessionId) { assert!(self.active_writer.is_none(), "single writer already active"); - let pending = &mut self.connections[conn]; + let pending = &mut self.connections[conn.as_index()]; assert!(!pending.in_tx, "connection already in transaction"); assert!( pending.read_snapshot.is_none(), @@ -138,8 +140,8 @@ impl GenerationModel { self.active_writer = Some(conn); } - pub(crate) fn insert(&mut self, conn: usize, table: usize, row: SimRow) { - let pending = &mut self.connections[conn]; + pub(crate) fn insert(&mut self, conn: SessionId, table: usize, row: SimRow) { + let pending = &mut self.connections[conn.as_index()]; if pending.in_tx { pending.staged_inserts.push((table, row)); } else { @@ -147,14 +149,14 @@ impl GenerationModel { } } - pub(crate) fn batch_insert(&mut self, conn: usize, table: usize, rows: &[SimRow]) { + pub(crate) fn batch_insert(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { for row in rows { self.insert(conn, table, row.clone()); } } - pub(crate) fn delete(&mut self, conn: usize, table: usize, row: SimRow) { - let pending = &mut self.connections[conn]; + pub(crate) fn delete(&mut self, conn: SessionId, table: usize, row: SimRow) { + let pending = &mut self.connections[conn.as_index()]; if pending.in_tx { pending .staged_inserts @@ -165,14 +167,14 @@ impl GenerationModel { } } - pub(crate) fn batch_delete(&mut self, conn: usize, table: usize, rows: &[SimRow]) { + pub(crate) fn batch_delete(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { for row in rows { self.delete(conn, table, row.clone()); } } - pub(crate) fn commit(&mut self, conn: usize) { - let pending = &mut self.connections[conn]; + pub(crate) fn commit(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; let inserts = std::mem::take(&mut pending.staged_inserts); let deletes = std::mem::take(&mut pending.staged_deletes); pending.in_tx = false; @@ -186,8 +188,8 @@ impl GenerationModel { } } - pub(crate) fn rollback(&mut self, conn: usize) { - let pending = &mut self.connections[conn]; + pub(crate) fn rollback(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; pending.staged_inserts.clear(); pending.staged_deletes.clear(); pending.in_tx = false; @@ -234,7 +236,7 @@ impl GenerationModel { pub struct ExpectedModel { committed: Vec>, connections: Vec, - active_writer: Option, + active_writer: Option, } #[derive(Clone, Debug, Default)] @@ -264,24 +266,24 @@ impl ExpectedModel { self.active_writer.is_none(), "multiple concurrent writers in expected model" ); - self.connections[*conn].in_tx = true; + self.connections[conn.as_index()].in_tx = true; self.active_writer = Some(*conn); } TableOperation::BeginReadTx { conn } => { - let state = &mut self.connections[*conn]; + let state = &mut self.connections[conn.as_index()]; assert!(!state.in_tx, "read tx started while write tx is open"); assert!(state.read_snapshot.is_none(), "nested read tx in expected model"); state.read_snapshot = Some(self.committed.clone()); } TableOperation::ReleaseReadTx { conn } => { assert!( - self.connections[*conn].read_snapshot.take().is_some(), + self.connections[conn.as_index()].read_snapshot.take().is_some(), "release read tx without open read tx" ); } TableOperation::CommitTx { conn } => { assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in expected model"); - let state = &mut self.connections[*conn]; + let state = &mut self.connections[conn.as_index()]; for (table, row) in state.staged_deletes.drain(..) { self.committed[table].retain(|candidate| *candidate != row); } @@ -297,7 +299,7 @@ impl ExpectedModel { Some(*conn), "rollback by non-owner in expected model" ); - let state = &mut self.connections[*conn]; + let state = &mut self.connections[conn.as_index()]; state.staged_inserts.clear(); state.staged_deletes.clear(); state.in_tx = false; @@ -344,12 +346,13 @@ impl ExpectedModel { } } - pub fn visible_rows(&self, conn: usize, table: usize) -> Vec { - if let Some(snapshot) = &self.connections[conn].read_snapshot { + pub fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let conn_idx = conn.as_index(); + if let Some(snapshot) = &self.connections[conn_idx].read_snapshot { return snapshot[table].clone(); } let mut rows = self.committed[table].clone(); - let pending = &self.connections[conn]; + let pending = &self.connections[conn_idx]; for (pending_table, row) in &pending.staged_deletes { if *pending_table == table { rows.retain(|candidate| candidate != row); @@ -363,13 +366,13 @@ impl ExpectedModel { rows } - pub fn lookup_by_id(&self, conn: usize, table: usize, id: u64) -> Option { + pub fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { self.visible_rows(conn, table) .into_iter() .find(|row| row.id() == Some(id)) } - pub fn predicate_count(&self, conn: usize, table: usize, col: u16, value: &AlgebraicValue) -> usize { + pub fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { self.visible_rows(conn, table) .into_iter() .filter(|row| row.values.get(col as usize) == Some(value)) @@ -378,7 +381,7 @@ impl ExpectedModel { pub fn range_scan( &self, - conn: usize, + conn: SessionId, table: usize, cols: &[u16], lower: &Bound, @@ -408,8 +411,8 @@ impl ExpectedModel { self.committed } - fn insert(&mut self, conn: usize, table: usize, row: SimRow) { - let state = &mut self.connections[conn]; + fn insert(&mut self, conn: SessionId, table: usize, row: SimRow) { + let state = &mut self.connections[conn.as_index()]; if state.in_tx { state.staged_inserts.push((table, row)); } else { @@ -417,8 +420,8 @@ impl ExpectedModel { } } - fn delete(&mut self, conn: usize, table: usize, row: SimRow) { - let state = &mut self.connections[conn]; + fn delete(&mut self, conn: SessionId, table: usize, row: SimRow) { + let state = &mut self.connections[conn.as_index()]; if state.in_tx { state .staged_inserts diff --git a/crates/dst/src/workload/table_ops/scenarios/banking.rs b/crates/dst/src/workload/table_ops/scenarios/banking.rs index 31f56735110..1a4db5febbb 100644 --- a/crates/dst/src/workload/table_ops/scenarios/banking.rs +++ b/crates/dst/src/workload/table_ops/scenarios/banking.rs @@ -1,6 +1,9 @@ use spacetimedb_sats::AlgebraicType; -use crate::schema::{ColumnPlan, SchemaPlan, TablePlan}; +use crate::{ + client::SessionId, + schema::{ColumnPlan, SchemaPlan, TablePlan}, +}; use super::super::{generation::ScenarioPlanner, TableWorkloadInteraction, TableWorkloadOutcome}; @@ -66,7 +69,7 @@ pub fn validate_outcome(schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> Ok(()) } -pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { +pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { if planner.maybe_control_tx(conn, 25, 20, 10) { return; } diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs index e6a95cc1c4c..ac024a87655 100644 --- a/crates/dst/src/workload/table_ops/scenarios/mod.rs +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -1,7 +1,7 @@ mod banking; mod random_crud; -use crate::{schema::SchemaPlan, seed::DstRng}; +use crate::{client::SessionId, schema::SchemaPlan, seed::DstRng}; use super::{generation::ScenarioPlanner, TableScenario, TableWorkloadOutcome}; @@ -31,7 +31,7 @@ impl TableScenario for RandomCrudScenario { random_crud::validate_outcome(schema, outcome) } - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { random_crud::fill_pending(planner, conn); } } @@ -45,7 +45,7 @@ impl TableScenario for BankingScenario { banking::validate_outcome(schema, outcome) } - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { banking::fill_pending(planner, conn); } } @@ -59,7 +59,7 @@ impl TableScenario for IndexedRangesScenario { random_crud::validate_outcome(schema, outcome) } - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { random_crud::fill_pending_indexed_ranges(planner, conn); } } @@ -81,7 +81,7 @@ impl TableScenario for TableScenarioId { } } - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize) { + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { match self { Self::RandomCrud => RandomCrudScenario.fill_pending(planner, conn), Self::IndexedRanges => IndexedRangesScenario.fill_pending(planner, conn), diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index c801a49f00b..d728c7f1ad9 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -3,6 +3,7 @@ use std::ops::Bound; use spacetimedb_sats::AlgebraicType; use crate::{ + client::SessionId, schema::{default_value_for_type, generate_supported_type, ColumnPlan, SchemaPlan, SimRow, TablePlan}, seed::DstRng, workload::strategy::{Index, Percent, Strategy}, @@ -156,15 +157,15 @@ pub fn validate_outcome(_schema: &SchemaPlan, _outcome: &TableWorkloadOutcome) - Ok(()) } -pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: usize) { +pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { fill_pending_with_profile(planner, conn, RANDOM_CRUD_PROFILE); } -pub fn fill_pending_indexed_ranges(planner: &mut ScenarioPlanner<'_>, conn: usize) { +pub fn fill_pending_indexed_ranges(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { fill_pending_with_profile(planner, conn, INDEXED_RANGES_PROFILE); } -fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: usize, profile: TableWorkloadProfile) { +fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: SessionId, profile: TableWorkloadProfile) { if planner.has_read_tx(conn) { let table = planner.choose_table(); let visible_rows = planner.visible_rows(conn, table); @@ -296,8 +297,9 @@ fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: usize, pro planner.push_interaction(TableWorkloadInteraction::delete(conn, table, row)); } -fn emit_write_conflict(planner: &mut ScenarioPlanner<'_>, owner: usize) -> bool { +fn emit_write_conflict(planner: &mut ScenarioPlanner<'_>, owner: SessionId) -> bool { let candidates = (0..planner.connection_count()) + .map(SessionId::from_index) .filter(|&conn| conn != owner && !planner.has_read_tx(conn)) .collect::>(); if candidates.is_empty() { @@ -315,7 +317,7 @@ fn emit_write_conflict(planner: &mut ScenarioPlanner<'_>, owner: usize) -> bool true } -fn emit_add_column(planner: &mut ScenarioPlanner<'_>, conn: usize, table: usize) -> bool { +fn emit_add_column(planner: &mut ScenarioPlanner<'_>, conn: SessionId, table: usize) -> bool { const MAX_COLUMNS_PER_TABLE: usize = 12; let column_idx = planner.table_plan(table).columns.len(); if column_idx >= MAX_COLUMNS_PER_TABLE { @@ -337,7 +339,7 @@ fn emit_add_column(planner: &mut ScenarioPlanner<'_>, conn: usize, table: usize) true } -fn emit_add_index(planner: &mut ScenarioPlanner<'_>, conn: usize, table: usize, visible_rows: &[SimRow]) -> bool { +fn emit_add_index(planner: &mut ScenarioPlanner<'_>, conn: SessionId, table: usize, visible_rows: &[SimRow]) -> bool { let candidates = candidate_new_indexes(planner, table); if candidates.is_empty() { return false; @@ -359,7 +361,7 @@ fn emit_add_index(planner: &mut ScenarioPlanner<'_>, conn: usize, table: usize, fn emit_unique_key_conflict_insert( planner: &mut ScenarioPlanner<'_>, - conn: usize, + conn: SessionId, table: usize, visible_rows: &[SimRow], ) -> bool { @@ -420,7 +422,7 @@ fn inclusive_bounds_for_rows( fn emit_query( planner: &mut ScenarioPlanner<'_>, - conn: usize, + conn: SessionId, table: usize, visible_rows: &[crate::schema::SimRow], ) -> bool { diff --git a/crates/dst/src/workload/table_ops/strategies.rs b/crates/dst/src/workload/table_ops/strategies.rs index 76faacf241e..13d04d2054c 100644 --- a/crates/dst/src/workload/table_ops/strategies.rs +++ b/crates/dst/src/workload/table_ops/strategies.rs @@ -1,19 +1,20 @@ //! Typed strategies specific to table-style workload generation. use crate::{ + client::SessionId, seed::DstRng, workload::strategy::{Index, Strategy, Weighted}, }; -/// Choose one connection uniformly. +/// Choose one logical session uniformly from the current fixed-size session pool. #[derive(Clone, Copy, Debug)] pub(crate) struct ConnectionChoice { pub(crate) connection_count: usize, } -impl Strategy for ConnectionChoice { - fn sample(&self, rng: &mut DstRng) -> usize { - Index::new(self.connection_count).sample(rng) +impl Strategy for ConnectionChoice { + fn sample(&self, rng: &mut DstRng) -> SessionId { + SessionId::from_index(Index::new(self.connection_count).sample(rng)) } } diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index 40e427f465f..ac5b35f1fa6 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -3,6 +3,7 @@ use std::ops::Bound; use spacetimedb_sats::AlgebraicValue; use crate::{ + client::SessionId, schema::{ColumnPlan, SchemaPlan, SimRow}, seed::DstRng, }; @@ -16,7 +17,7 @@ use super::generation::ScenarioPlanner; pub(crate) trait TableScenario: Clone { fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan; fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()>; - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: usize); + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId); } /// One generated workload step. @@ -31,81 +32,85 @@ pub type TableWorkloadInteraction = PlannedInteraction; #[derive(Clone, Debug, Eq, PartialEq)] pub enum TableOperation { /// Start an explicit write transaction on a connection. - BeginTx { conn: usize }, + BeginTx { conn: SessionId }, /// Commit the connection's explicit write transaction. - CommitTx { conn: usize }, + CommitTx { conn: SessionId }, /// Roll back the connection's explicit write transaction. - RollbackTx { conn: usize }, + RollbackTx { conn: SessionId }, /// Hold a read snapshot open while later reads observe stable state. - BeginReadTx { conn: usize }, + BeginReadTx { conn: SessionId }, /// Release a previously opened read snapshot. - ReleaseReadTx { conn: usize }, + ReleaseReadTx { conn: SessionId }, /// Attempt to start a second writer while another connection owns the write lock. - BeginTxConflict { owner: usize, conn: usize }, + BeginTxConflict { owner: SessionId, conn: SessionId }, /// Attempt an auto-commit write while another connection owns the write lock. WriteConflictInsert { - owner: usize, - conn: usize, + owner: SessionId, + conn: SessionId, table: usize, row: SimRow, }, /// Insert a new row with a fresh primary id. - Insert { conn: usize, table: usize, row: SimRow }, + Insert { conn: SessionId, table: usize, row: SimRow }, /// Delete an existing visible row. - Delete { conn: usize, table: usize, row: SimRow }, + Delete { conn: SessionId, table: usize, row: SimRow }, /// Reinsert an exact row that is already visible. /// /// RelationalDB has set semantics for identical rows, so this should be an /// idempotent no-op rather than a unique-key error. - ExactDuplicateInsert { conn: usize, table: usize, row: SimRow }, + ExactDuplicateInsert { conn: SessionId, table: usize, row: SimRow }, /// Insert a row with an existing primary id but different non-key payload. /// /// This is the operation that should fail with `UniqueConstraintViolation`. - UniqueKeyConflictInsert { conn: usize, table: usize, row: SimRow }, + UniqueKeyConflictInsert { conn: SessionId, table: usize, row: SimRow }, /// Delete a row that is absent from the visible state. - DeleteMissing { conn: usize, table: usize, row: SimRow }, + DeleteMissing { conn: SessionId, table: usize, row: SimRow }, /// Insert several fresh rows in one interaction. BatchInsert { - conn: usize, + conn: SessionId, table: usize, rows: Vec, }, /// Delete several visible rows in one interaction. BatchDelete { - conn: usize, + conn: SessionId, table: usize, rows: Vec, }, /// Delete and insert the same row, stressing delete/insert ordering. - Reinsert { conn: usize, table: usize, row: SimRow }, + Reinsert { conn: SessionId, table: usize, row: SimRow }, /// Add a column to an existing table with a default for live rows. AddColumn { - conn: usize, + conn: SessionId, table: usize, column: ColumnPlan, default: AlgebraicValue, }, /// Add a non-primary index after data exists. - AddIndex { conn: usize, table: usize, cols: Vec }, + AddIndex { + conn: SessionId, + table: usize, + cols: Vec, + }, /// Query a row by primary id and compare against the model. - PointLookup { conn: usize, table: usize, id: u64 }, + PointLookup { conn: SessionId, table: usize, id: u64 }, /// Count rows by equality on one column and compare against the model. PredicateCount { - conn: usize, + conn: SessionId, table: usize, col: u16, value: AlgebraicValue, }, /// Scan an indexed range and compare against model filtering. RangeScan { - conn: usize, + conn: SessionId, table: usize, cols: Vec, lower: Bound, upper: Bound, }, /// Scan all visible rows and compare against the model. - FullScan { conn: usize, table: usize }, + FullScan { conn: SessionId, table: usize }, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -136,34 +141,34 @@ impl PlannedInteraction { } } - pub fn begin_tx(conn: usize) -> Self { + pub fn begin_tx(conn: SessionId) -> Self { Self::ok(TableOperation::BeginTx { conn }) } - pub fn commit_tx(conn: usize) -> Self { + pub fn commit_tx(conn: SessionId) -> Self { Self::ok(TableOperation::CommitTx { conn }) } - pub fn rollback_tx(conn: usize) -> Self { + pub fn rollback_tx(conn: SessionId) -> Self { Self::ok(TableOperation::RollbackTx { conn }) } - pub fn begin_read_tx(conn: usize) -> Self { + pub fn begin_read_tx(conn: SessionId) -> Self { Self::ok(TableOperation::BeginReadTx { conn }) } - pub fn release_read_tx(conn: usize) -> Self { + pub fn release_read_tx(conn: SessionId) -> Self { Self::ok(TableOperation::ReleaseReadTx { conn }) } - pub fn begin_tx_conflict(owner: usize, conn: usize) -> Self { + pub fn begin_tx_conflict(owner: SessionId, conn: SessionId) -> Self { Self::expected_err( TableOperation::BeginTxConflict { owner, conn }, ExpectedErrorKind::WriteConflict, ) } - pub fn write_conflict_insert(owner: usize, conn: usize, table: usize, row: SimRow) -> Self { + pub fn write_conflict_insert(owner: SessionId, conn: SessionId, table: usize, row: SimRow) -> Self { Self::expected_err( TableOperation::WriteConflictInsert { owner, @@ -175,45 +180,45 @@ impl PlannedInteraction { ) } - pub fn insert(conn: usize, table: usize, row: SimRow) -> Self { + pub fn insert(conn: SessionId, table: usize, row: SimRow) -> Self { Self::ok(TableOperation::Insert { conn, table, row }) } - pub fn delete(conn: usize, table: usize, row: SimRow) -> Self { + pub fn delete(conn: SessionId, table: usize, row: SimRow) -> Self { Self::ok(TableOperation::Delete { conn, table, row }) } - pub fn exact_duplicate_insert(conn: usize, table: usize, row: SimRow) -> Self { + pub fn exact_duplicate_insert(conn: SessionId, table: usize, row: SimRow) -> Self { Self::ok(TableOperation::ExactDuplicateInsert { conn, table, row }) } - pub fn unique_key_conflict_insert(conn: usize, table: usize, row: SimRow) -> Self { + pub fn unique_key_conflict_insert(conn: SessionId, table: usize, row: SimRow) -> Self { Self::expected_err( TableOperation::UniqueKeyConflictInsert { conn, table, row }, ExpectedErrorKind::UniqueConstraintViolation, ) } - pub fn delete_missing(conn: usize, table: usize, row: SimRow) -> Self { + pub fn delete_missing(conn: SessionId, table: usize, row: SimRow) -> Self { Self::expected_err( TableOperation::DeleteMissing { conn, table, row }, ExpectedErrorKind::MissingRow, ) } - pub fn batch_insert(conn: usize, table: usize, rows: Vec) -> Self { + pub fn batch_insert(conn: SessionId, table: usize, rows: Vec) -> Self { Self::ok(TableOperation::BatchInsert { conn, table, rows }) } - pub fn batch_delete(conn: usize, table: usize, rows: Vec) -> Self { + pub fn batch_delete(conn: SessionId, table: usize, rows: Vec) -> Self { Self::ok(TableOperation::BatchDelete { conn, table, rows }) } - pub fn reinsert(conn: usize, table: usize, row: SimRow) -> Self { + pub fn reinsert(conn: SessionId, table: usize, row: SimRow) -> Self { Self::ok(TableOperation::Reinsert { conn, table, row }) } - pub fn add_column(conn: usize, table: usize, column: ColumnPlan, default: AlgebraicValue) -> Self { + pub fn add_column(conn: SessionId, table: usize, column: ColumnPlan, default: AlgebraicValue) -> Self { Self::ok(TableOperation::AddColumn { conn, table, @@ -222,15 +227,15 @@ impl PlannedInteraction { }) } - pub fn add_index(conn: usize, table: usize, cols: Vec) -> Self { + pub fn add_index(conn: SessionId, table: usize, cols: Vec) -> Self { Self::ok(TableOperation::AddIndex { conn, table, cols }) } - pub fn point_lookup(conn: usize, table: usize, id: u64) -> Self { + pub fn point_lookup(conn: SessionId, table: usize, id: u64) -> Self { Self::ok(TableOperation::PointLookup { conn, table, id }) } - pub fn predicate_count(conn: usize, table: usize, col: u16, value: AlgebraicValue) -> Self { + pub fn predicate_count(conn: SessionId, table: usize, col: u16, value: AlgebraicValue) -> Self { Self::ok(TableOperation::PredicateCount { conn, table, @@ -240,7 +245,7 @@ impl PlannedInteraction { } pub fn range_scan( - conn: usize, + conn: SessionId, table: usize, cols: Vec, lower: Bound, @@ -255,7 +260,7 @@ impl PlannedInteraction { }) } - pub fn full_scan(conn: usize, table: usize) -> Self { + pub fn full_scan(conn: SessionId, table: usize) -> Self { Self::ok(TableOperation::FullScan { conn, table }) } } @@ -269,12 +274,12 @@ pub struct TableWorkloadOutcome { pub final_rows: Vec>, } -/// Per-connection write transaction bookkeeping shared by locking targets. +/// Per-session write transaction bookkeeping shared by locking targets. pub(crate) struct ConnectionWriteState { - /// Open mutable transaction handle for each simulated connection. + /// Open mutable transaction handle for each simulated session. pub tx_by_connection: Vec>, - /// Connection that currently owns the single-writer lock, if any. - pub active_writer: Option, + /// Session that currently owns the single-writer lock, if any. + pub active_writer: Option, } impl ConnectionWriteState { @@ -285,14 +290,14 @@ impl ConnectionWriteState { } } - pub fn ensure_known_connection(&self, conn: usize) -> Result<(), String> { + pub fn ensure_known_connection(&self, conn: SessionId) -> Result<(), String> { self.tx_by_connection - .get(conn) + .get(conn.as_index()) .map(|_| ()) .ok_or_else(|| format!("connection {conn} out of range")) } - pub fn ensure_writer_owner(&self, conn: usize, action: &str) -> Result<(), String> { + pub fn ensure_writer_owner(&self, conn: SessionId, action: &str) -> Result<(), String> { self.ensure_known_connection(conn)?; match self.active_writer { Some(owner) if owner == conn => Ok(()), From b0490af784db5f2618a86d9dde34b5aa2aef2d95 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 4 May 2026 20:29:55 +0530 Subject: [PATCH 25/74] split properties --- Cargo.lock | 10 +- Cargo.toml | 4 +- crates/core/build.rs | 10 + crates/core/src/util/jobs.rs | 8 +- .../src/locking_tx_datastore/datastore.rs | 26 - .../src/locking_tx_datastore/lock_trace.rs | 57 - .../datastore/src/locking_tx_datastore/mod.rs | 2 - crates/dst/Cargo.toml | 4 +- crates/dst/README.md | 16 +- crates/dst/build.rs | 10 + crates/dst/src/core/mod.rs | 6 +- crates/dst/src/lib.rs | 7 + crates/dst/src/main.rs | 4 +- crates/dst/src/properties.rs | 1088 +---------------- crates/dst/src/properties/rules.rs | 496 ++++++++ crates/dst/src/properties/runtime.rs | 512 ++++++++ crates/dst/src/targets/buggified_repo.rs | 8 +- .../src/targets/relational_db_commitlog.rs | 8 +- crates/dst/tests/madsim_tcp.rs | 2 +- crates/io/build.rs | 10 + crates/io/src/lib.rs | 27 +- crates/standalone/build.rs | 10 + crates/standalone/src/subcommands/start.rs | 35 +- run_dst.sh | 3 + 24 files changed, 1183 insertions(+), 1180 deletions(-) create mode 100644 crates/core/build.rs delete mode 100644 crates/datastore/src/locking_tx_datastore/lock_trace.rs create mode 100644 crates/dst/build.rs create mode 100644 crates/dst/src/properties/rules.rs create mode 100644 crates/dst/src/properties/runtime.rs create mode 100644 crates/io/build.rs create mode 100644 crates/standalone/build.rs diff --git a/Cargo.lock b/Cargo.lock index 84f7261859f..5cf1422dccb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8307,7 +8307,7 @@ dependencies = [ [[package]] name = "spacetimedb-dst" -version = "2.1.0" +version = "2.2.0" dependencies = [ "anyhow", "bytes", @@ -8323,10 +8323,10 @@ dependencies = [ "spacetimedb-datastore", "spacetimedb-durability", "spacetimedb-execution", - "spacetimedb-lib 2.1.0", + "spacetimedb-lib 2.2.0", "spacetimedb-paths", - "spacetimedb-primitives 2.1.0", - "spacetimedb-sats 2.1.0", + "spacetimedb-primitives 2.2.0", + "spacetimedb-sats 2.2.0", "spacetimedb-schema", "spacetimedb-standalone", "spacetimedb-table", @@ -8416,7 +8416,7 @@ dependencies = [ [[package]] name = "spacetimedb-io" -version = "2.1.0" +version = "2.2.0" dependencies = [ "madsim-tokio", ] diff --git a/Cargo.toml b/Cargo.toml index a7043e48664..baa1e9f3b8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -132,7 +132,7 @@ spacetimedb-durability = { path = "crates/durability", version = "=2.2.0" } spacetimedb-execution = { path = "crates/execution", version = "=2.2.0" } spacetimedb-expr = { path = "crates/expr", version = "=2.2.0" } spacetimedb-guard = { path = "crates/guard", version = "=2.2.0" } -spacetimedb-io = { path = "crates/io", version = "=2.1.0" } +spacetimedb-io = { path = "crates/io", version = "=2.2.0" } spacetimedb-lib = { path = "crates/lib", default-features = false, version = "=2.2.0" } spacetimedb-memory-usage = { path = "crates/memory-usage", version = "=2.2.0", default-features = false } spacetimedb-metrics = { path = "crates/metrics", version = "=2.2.0" } @@ -392,7 +392,7 @@ features = [ ] [workspace.lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(madsim)'] } +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(madsim)', 'cfg(simulation)'] } [workspace.lints.clippy] # FIXME: we should work on this lint incrementally diff --git a/crates/core/build.rs b/crates/core/build.rs new file mode 100644 index 00000000000..3982c077afc --- /dev/null +++ b/crates/core/build.rs @@ -0,0 +1,10 @@ +fn main() { + println!("cargo:rerun-if-env-changed=CARGO_CFG_MADSIM"); + println!("cargo:rerun-if-env-changed=CARGO_CFG_SIMULATION"); + println!("cargo:rerun-if-env-changed=CARGO_ENCODED_RUSTFLAGS"); + println!("cargo:rerun-if-env-changed=RUSTFLAGS"); + + if std::env::var_os("CARGO_CFG_MADSIM").is_some() { + println!("cargo:rustc-cfg=simulation"); + } +} diff --git a/crates/core/src/util/jobs.rs b/crates/core/src/util/jobs.rs index 2ac4122d3ab..86d8be6ad2b 100644 --- a/crates/core/src/util/jobs.rs +++ b/crates/core/src/util/jobs.rs @@ -7,7 +7,7 @@ use futures::FutureExt; use indexmap::IndexMap; use smallvec::SmallVec; use spacetimedb_data_structures::map::HashMap; -#[cfg(not(madsim))] +#[cfg(not(simulation))] use tokio::runtime; use tokio::sync::{mpsc, oneshot, watch}; use tracing::Instrument; @@ -290,14 +290,14 @@ pub struct SingleCoreExecutor { struct SingleCoreExecutorInner { /// The sending end of a channel over which we send jobs. job_tx: mpsc::UnboundedSender LocalBoxFuture<'static, ()> + Send>>, - #[cfg(madsim)] + #[cfg(simulation)] /// Retains the allocation guard for the lifetime of the simulated executor. _guard: LoadBalanceOnDropGuard, } impl SingleCoreExecutor { /// Spawn a `SingleCoreExecutor` on the given core. - #[cfg(not(madsim))] + #[cfg(not(simulation))] fn spawn(core: AllocatedJobCore) -> Self { let AllocatedJobCore { guard, mut pinner } = core; @@ -337,7 +337,7 @@ impl SingleCoreExecutor { /// In simulation, job execution models the same logical single-core queue /// without creating an OS thread or re-entering a Tokio runtime with /// `Handle::block_on`. - #[cfg(madsim)] + #[cfg(simulation)] fn spawn(core: AllocatedJobCore) -> Self { let AllocatedJobCore { guard, pinner: _ } = core; diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index f4728f5165f..edcce91ce5e 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -1,5 +1,3 @@ -#[cfg(any(test, feature = "test"))] -use super::lock_trace::{self, LockEvent, LockEventKind}; use super::{ committed_state::CommittedState, mut_tx::MutTxId, sequence::SequencesState, state_view::StateView, tx::TxId, tx_state::TxState, @@ -341,15 +339,7 @@ impl Tx for Locking { let ctx = ExecutionContext::with_workload(self.database_identity, workload); let timer = Instant::now(); - #[cfg(any(test, feature = "test"))] - lock_trace::emit(LockEvent { - kind: LockEventKind::BeginReadRequested, - }); let committed_state_shared_lock = self.committed_state.read_arc(); - #[cfg(any(test, feature = "test"))] - lock_trace::emit(LockEvent { - kind: LockEventKind::BeginReadAcquired, - }); let lock_wait_time = timer.elapsed(); Self::Tx { @@ -905,24 +895,8 @@ impl MutTx for Locking { let ctx = ExecutionContext::with_workload(self.database_identity, workload); let timer = Instant::now(); - #[cfg(any(test, feature = "test"))] - lock_trace::emit(LockEvent { - kind: LockEventKind::BeginWriteRequested, - }); let committed_state_write_lock = self.committed_state.write_arc(); - #[cfg(any(test, feature = "test"))] - lock_trace::emit(LockEvent { - kind: LockEventKind::BeginWriteAcquired, - }); - #[cfg(any(test, feature = "test"))] - lock_trace::emit(LockEvent { - kind: LockEventKind::SequenceMutexRequested, - }); let sequence_state_lock = self.sequence_state.lock_arc(); - #[cfg(any(test, feature = "test"))] - lock_trace::emit(LockEvent { - kind: LockEventKind::SequenceMutexAcquired, - }); let lock_wait_time = timer.elapsed(); MutTxId { diff --git a/crates/datastore/src/locking_tx_datastore/lock_trace.rs b/crates/datastore/src/locking_tx_datastore/lock_trace.rs deleted file mode 100644 index 38584aa7395..00000000000 --- a/crates/datastore/src/locking_tx_datastore/lock_trace.rs +++ /dev/null @@ -1,57 +0,0 @@ -#[cfg(any(test, feature = "test"))] -use std::sync::{Arc, Mutex, OnceLock}; - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum LockEventKind { - BeginReadRequested, - BeginReadAcquired, - BeginWriteRequested, - BeginWriteAcquired, - SequenceMutexRequested, - SequenceMutexAcquired, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct LockEvent { - pub kind: LockEventKind, -} - -#[cfg(any(test, feature = "test"))] -type Hook = Arc; - -#[cfg(any(test, feature = "test"))] -fn hook_cell() -> &'static Mutex> { - static CELL: OnceLock>> = OnceLock::new(); - CELL.get_or_init(|| Mutex::new(None)) -} - -#[cfg(any(test, feature = "test"))] -pub struct HookGuard; - -#[cfg(any(test, feature = "test"))] -impl Drop for HookGuard { - fn drop(&mut self) { - *hook_cell().lock().expect("lock hook cell") = None; - } -} - -#[cfg(any(test, feature = "test"))] -pub fn install_lock_event_hook(hook: impl Fn(LockEvent) + Send + Sync + 'static) -> HookGuard { - *hook_cell().lock().expect("lock hook cell") = Some(Arc::new(hook)); - HookGuard -} - -#[cfg(not(any(test, feature = "test")))] -pub struct HookGuard; - -#[cfg(not(any(test, feature = "test")))] -pub fn install_lock_event_hook(_hook: impl Fn(LockEvent) + Send + Sync + 'static) -> HookGuard { - HookGuard -} - -pub(super) fn emit(event: LockEvent) { - #[cfg(any(test, feature = "test"))] - if let Some(hook) = hook_cell().lock().expect("lock hook cell").clone() { - hook(event); - } -} diff --git a/crates/datastore/src/locking_tx_datastore/mod.rs b/crates/datastore/src/locking_tx_datastore/mod.rs index 4c080e3655d..8f77b462bdd 100644 --- a/crates/datastore/src/locking_tx_datastore/mod.rs +++ b/crates/datastore/src/locking_tx_datastore/mod.rs @@ -2,8 +2,6 @@ pub mod committed_state; pub mod datastore; -#[cfg(any(test, feature = "test"))] -pub mod lock_trace; mod mut_tx; pub use mut_tx::{FuncCallType, IndexScanPointOrRange, MutTxId, ViewCallInfo}; mod sequence; diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 9b64572ed26..21ae398c299 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -24,9 +24,9 @@ spacetimedb-cli.workspace = true spacetimedb-client-api.workspace = true spacetimedb-client-api-messages.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } -spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.1.0" } +spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0" } spacetimedb-commitlog = { workspace = true, features = ["test"] } -spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.1.0" } +spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0" } spacetimedb-execution.workspace = true spacetimedb-lib.workspace = true spacetimedb-paths.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md index f6a8a0cbc8f..e12790703e8 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -135,8 +135,8 @@ Current property families include: ## Fault Injection `relational-db-commitlog` can wrap the in-memory commitlog repo in -`BuggifiedRepo`. Fault decisions are deterministic under madsim and summarized -in the final outcome. +`BuggifiedRepo`. Fault decisions are deterministic in simulation runs and +summarized in the final outcome. Profiles: @@ -163,7 +163,7 @@ cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario cargo run -p spacetimedb-dst -- run --target standalone-host --scenario host-smoke --max-interactions 100 ``` -madsim run with commitlog faults: +madsim-backed simulation run with commitlog faults: ```bash RUSTFLAGS='--cfg madsim' cargo run -p spacetimedb-dst -- run \ @@ -173,6 +173,12 @@ RUSTFLAGS='--cfg madsim' cargo run -p spacetimedb-dst -- run \ --commitlog-fault-profile default ``` +`--cfg madsim` is still the switch that enables madsim-tokio. Do not pass +`--cfg simulation` directly: that only enables SpacetimeDB's cfg gates and leaves +the madsim dependency in its normal Tokio/std mode. The workspace crates derive +`cfg(simulation)` from `cfg(madsim)` so SpacetimeDB source code does not need +provider-specific cfg gates. + Trace every interaction: ```bash @@ -217,12 +223,12 @@ Start here: - No shrinker yet; seed replay is the current reproduction mechanism. - Sometimes-property reporting is still outcome-counter based, not a stable property-event catalog. -- madsim is used for current deterministic runtime/fault hooks; deeper +- madsim backs the current deterministic runtime/fault hooks; deeper host/network/filesystem simulation still needs explicit runtime and IO boundaries. - The current `RelationalDB` target drives open read snapshots to release before starting writes, because beginning a write behind an open read snapshot can block in this target shape. Interleaved read/write snapshot histories should come back once the target models that lock behavior explicitly. -- Current madsim builds still expose runtime-boundary gaps, including +- Current simulation builds still expose runtime-boundary gaps, including `spawn_blocking` call sites and randomized standard `HashMap` state warnings. diff --git a/crates/dst/build.rs b/crates/dst/build.rs new file mode 100644 index 00000000000..3982c077afc --- /dev/null +++ b/crates/dst/build.rs @@ -0,0 +1,10 @@ +fn main() { + println!("cargo:rerun-if-env-changed=CARGO_CFG_MADSIM"); + println!("cargo:rerun-if-env-changed=CARGO_CFG_SIMULATION"); + println!("cargo:rerun-if-env-changed=CARGO_ENCODED_RUSTFLAGS"); + println!("cargo:rerun-if-env-changed=RUSTFLAGS"); + + if std::env::var_os("CARGO_CFG_MADSIM").is_some() { + println!("cargo:rustc-cfg=simulation"); + } +} diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index 69ba1810284..2a781a714c0 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -11,7 +11,7 @@ use crate::config::RunConfig; use futures_util::FutureExt; /// Pull-based deterministic interaction source. -pub trait NextInteractionSource { +pub trait WorkloadSource { type Interaction; fn next_interaction(&mut self) -> Option; @@ -50,7 +50,7 @@ pub async fn run_streaming( ) -> anyhow::Result where I: Clone + Debug, - S: NextInteractionSource, + S: WorkloadSource, E: TargetEngine, P: StreamingProperties, { @@ -151,7 +151,7 @@ mod tests { } } - impl NextInteractionSource for SingleStepSource { + impl WorkloadSource for SingleStepSource { type Interaction = TestInteraction; fn next_interaction(&mut self) -> Option { diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index f66a8fd8dc9..c8e29d0a5c2 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -32,6 +32,13 @@ //! 7. Shared randomness, weighting, and sampling helpers belong in the //! workload strategy module, not in ad hoc target or scenario code. +#[cfg(all(simulation, not(madsim)))] +compile_error!( + "cfg(simulation) enables SpacetimeDB simulation gates, but madsim itself \ + still requires cfg(madsim). Use RUSTFLAGS=\"--cfg madsim\" or ./run_dst.sh; \ + SpacetimeDB crates derive cfg(simulation) from cfg(madsim)." +); + /// Logical client/session identifiers shared by workloads and targets. pub mod client; /// Shared run-budget configuration for DST targets. diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index 30327001368..527ed27f68d 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -136,7 +136,7 @@ fn run_prepared_target( run_in_runtime(seed, run_target::(seed, scenario, config)) } -#[cfg(madsim)] +#[cfg(all(simulation, madsim))] fn run_in_runtime(seed: DstSeed, future: F) -> anyhow::Result where F: Future>, @@ -146,7 +146,7 @@ where runtime.block_on(future) } -#[cfg(not(madsim))] +#[cfg(not(all(simulation, madsim)))] fn run_in_runtime(_seed: DstSeed, future: F) -> anyhow::Result where F: Future>, diff --git a/crates/dst/src/properties.rs b/crates/dst/src/properties.rs index 6550ef8b87b..0e817894912 100644 --- a/crates/dst/src/properties.rs +++ b/crates/dst/src/properties.rs @@ -5,47 +5,46 @@ //! rules compare those observations against either the target's externally //! visible state, an expected model, or durable replay state. //! -//! ## Property Catalog +//! ## Property Model //! -//! - `InsertSelect`: a row inserted by a session is immediately visible to that -//! same session. -//! - `DeleteSelect`: a row deleted by a session is no longer visible to that -//! same session. -//! - `SelectSelectOptimizer`: a NoREC-style check comparing indexed/filter -//! query results with a direct row projection. -//! - `WhereTrueFalseNull`: a TLP-style partition check for boolean predicates. -//! - `NotCrash`: target interactions, finish, and outcome collection must not -//! panic. This is enforced by the shared streaming runner. -//! - `IndexRangeExcluded`: range scans over composite indexes obey excluded -//! upper bounds. -//! - `BankingTablesMatch`: scenario-level shadow tables stay identical. -//! - `DynamicMigrationAutoInc`: migrated dynamic tables keep advancing integer -//! IDs after schema changes. -//! - `DurableReplayMatchesModel`: replayed durable state matches the expected -//! committed model. -//! - `ExpectedErrorMatches`: generated expected failures are the failures the -//! target actually reports. -//! - `PointLookupMatchesModel`, `PredicateCountMatchesModel`, -//! `RangeScanMatchesModel`, and `FullScanMatchesModel`: query observations -//! match the expected visibility model for the acting session. +//! A property is a named check over a run. It observes generated interactions, +//! target observations, target-visible state, expected models, and final +//! outcomes. Failures should include a stable property name and enough context +//! to replay the seed or trace. +//! +//! The current catalog is intentionally small and falls into the same groups +//! used by the proposal: +//! +//! - Safety properties: `NotCrash`, `ExpectedErrorMatches`, +//! `DurableReplayMatchesModel`, `BankingTablesMatch`, and +//! `DynamicMigrationAutoInc`. +//! - Model/oracle properties: `PointLookupMatchesModel`, +//! `PredicateCountMatchesModel`, `RangeScanMatchesModel`, +//! `FullScanMatchesModel`, and the scenario-specific final table-state check. +//! - Differential and metamorphic properties: `InsertSelect`, `DeleteSelect`, +//! `SelectSelectOptimizer`, `WhereTrueFalseNull`, and `IndexRangeExcluded`. +//! - Coverage and progress properties are not first-class rules yet. For now, +//! targets expose operation and outcome counters. Those counters should become +//! selectable properties once long-running and replication targets need them. + +mod rules; +mod runtime; use std::ops::Bound; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; +use spacetimedb_sats::AlgebraicValue; use crate::{ client::SessionId, - core::StreamingProperties, schema::{SchemaPlan, SimRow}, workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, - table_ops::{ - ExpectedErrorKind, ExpectedModel, ExpectedResult, TableOperation, TableScenario, TableWorkloadInteraction, - TableWorkloadOutcome, - }, + commitlog_ops::DurableReplaySummary, + table_ops::{ExpectedErrorKind, TableWorkloadInteraction, TableWorkloadOutcome}, }, }; +pub(crate) use runtime::PropertyRuntime; + /// Target adapter for property evaluation. pub(crate) trait TargetPropertyAccess { fn schema_plan(&self) -> &SchemaPlan; @@ -65,33 +64,35 @@ pub(crate) trait TargetPropertyAccess { /// Canonical property IDs that can be selected by targets. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum PropertyKind { - /// Target execution must not panic. Enforced by the shared streaming runner. + /// Safety: target execution must not panic. + /// + /// Enforced by the shared streaming runner. NotCrash, - /// Inserted rows are visible to the inserting session. + /// Metamorphic: an inserted row is immediately visible to the inserting session. InsertSelect, - /// Deleted rows disappear from the deleting session's view. + /// Metamorphic: a deleted row disappears from the deleting session's view. DeleteSelect, - /// Optimized predicate counts agree with direct row projection. + /// Differential: optimized predicate counts agree with direct row projection. SelectSelectOptimizer, - /// Boolean partitions preserve total cardinality. + /// Metamorphic: boolean partitions preserve total cardinality. WhereTrueFalseNull, - /// Composite index range scans implement excluded upper bounds correctly. + /// Metamorphic: composite index range scans implement excluded upper bounds correctly. IndexRangeExcluded, - /// Banking scenario debit and credit shadow tables remain identical. + /// Safety: banking scenario debit and credit shadow tables remain identical. BankingTablesMatch, - /// Auto-increment IDs continue advancing after dynamic table migration. + /// Safety: auto-increment IDs continue advancing after dynamic table migration. DynamicMigrationAutoInc, - /// Durable replay state equals the expected committed model. + /// Safety: durable replay state equals the expected committed model. DurableReplayMatchesModel, - /// Expected-error interactions fail with the expected error class. + /// Safety: expected-error interactions fail with the expected error class. ExpectedErrorMatches, - /// Point lookups match the expected session-visible model. + /// Model/oracle: point lookups match the expected session-visible model. PointLookupMatchesModel, - /// Predicate counts match the expected session-visible model. + /// Model/oracle: predicate counts match the expected session-visible model. PredicateCountMatchesModel, - /// Range scans match the expected session-visible model. + /// Model/oracle: range scans match the expected session-visible model. RangeScanMatchesModel, - /// Full scans match the expected session-visible model. + /// Model/oracle: full scans match the expected session-visible model. FullScanMatchesModel, } @@ -158,23 +159,13 @@ pub(crate) enum CommitlogObservation { DurableReplay(DurableReplaySummary), } -#[derive(Clone, Debug)] -pub(crate) struct PropertyModels { - table: TableModel, -} - -#[derive(Clone, Debug)] -pub(crate) struct TableModel { - expected: ExpectedModel, -} - -pub(crate) struct PropertyContext<'a> { - pub access: &'a dyn TargetPropertyAccess, - pub models: &'a PropertyModels, +struct PropertyContext<'a> { + access: &'a dyn TargetPropertyAccess, + models: &'a runtime::PropertyModels, } #[derive(Clone, Debug)] -pub(crate) enum PropertyEvent<'a> { +enum PropertyEvent<'a> { TableInteractionApplied, RowInserted { conn: SessionId, @@ -223,986 +214,3 @@ pub(crate) enum PropertyEvent<'a> { DurableReplay(&'a DurableReplaySummary), TableWorkloadFinished(&'a TableWorkloadOutcome), } - -impl PropertyModels { - pub fn new(table_count: usize, num_connections: usize) -> Self { - Self { - table: TableModel { - expected: ExpectedModel::new(table_count, num_connections), - }, - } - } - - pub fn table(&self) -> &TableModel { - &self.table - } - - fn apply(&mut self, interaction: &TableWorkloadInteraction) { - self.table.expected.apply(interaction); - } -} - -impl TableModel { - pub fn committed_rows(&self) -> Vec> { - self.expected.clone().committed_rows() - } - - pub fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { - self.expected.lookup_by_id(conn, table, id) - } - - pub fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { - self.expected.predicate_count(conn, table, col, value) - } - - pub fn range_scan( - &self, - conn: SessionId, - table: usize, - cols: &[u16], - lower: &Bound, - upper: &Bound, - ) -> Vec { - self.expected.range_scan(conn, table, cols, lower, upper) - } - - pub fn full_scan(&self, conn: SessionId, table: usize) -> Vec { - let mut rows = self.expected.visible_rows(conn, table); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - rows - } -} - -/// Mutable runtime holding selected property implementations. -pub(crate) struct PropertyRuntime { - rules: Vec, - models: PropertyModels, -} - -impl PropertyRuntime { - pub fn with_kinds(kinds: &[PropertyKind]) -> Self { - let mut rules: Vec = Vec::with_capacity(kinds.len()); - for kind in kinds { - match kind { - PropertyKind::NotCrash => rules.push(RuleEntry::new(*kind, Box::::default())), - PropertyKind::InsertSelect => rules.push(RuleEntry::new(*kind, Box::::default())), - PropertyKind::DeleteSelect => rules.push(RuleEntry::new(*kind, Box::::default())), - PropertyKind::SelectSelectOptimizer => rules.push(RuleEntry::new(*kind, Box::::default())), - PropertyKind::WhereTrueFalseNull => rules.push(RuleEntry::new(*kind, Box::::default())), - PropertyKind::IndexRangeExcluded => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - PropertyKind::BankingTablesMatch => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - PropertyKind::DynamicMigrationAutoInc => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - PropertyKind::DurableReplayMatchesModel => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - PropertyKind::ExpectedErrorMatches => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - PropertyKind::PointLookupMatchesModel => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - PropertyKind::PredicateCountMatchesModel => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - PropertyKind::RangeScanMatchesModel => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - PropertyKind::FullScanMatchesModel => { - rules.push(RuleEntry::new(*kind, Box::::default())) - } - } - } - Self { - rules, - models: PropertyModels::new(0, 0), - } - } - - pub fn for_table_workload(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self - where - S: TableScenario + 'static, - { - let mut runtime = Self { - models: PropertyModels::new(schema.tables.len(), num_connections), - ..Self::default() - }; - runtime - .rules - .push(RuleEntry::non_periodic(Box::new(ExpectedTableStateRule::new( - scenario, schema, - )))); - runtime - } - - pub fn on_table_interaction( - &mut self, - access: &dyn TargetPropertyAccess, - interaction: &TableWorkloadInteraction, - ) -> Result<(), String> { - match &interaction.op { - TableOperation::BeginTx { .. } - | TableOperation::CommitTx { .. } - | TableOperation::RollbackTx { .. } - | TableOperation::BeginReadTx { .. } - | TableOperation::ReleaseReadTx { .. } => self.models.apply(interaction), - TableOperation::BatchInsert { .. } - | TableOperation::BatchDelete { .. } - | TableOperation::Reinsert { .. } - | TableOperation::AddColumn { .. } - | TableOperation::AddIndex { .. } => self.models.apply(interaction), - TableOperation::Insert { .. } - | TableOperation::Delete { .. } - | TableOperation::BeginTxConflict { .. } - | TableOperation::WriteConflictInsert { .. } - | TableOperation::ExactDuplicateInsert { .. } - | TableOperation::UniqueKeyConflictInsert { .. } - | TableOperation::DeleteMissing { .. } - | TableOperation::PointLookup { .. } - | TableOperation::PredicateCount { .. } - | TableOperation::RangeScan { .. } - | TableOperation::FullScan { .. } => {} - } - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe(&ctx, PropertyEvent::TableInteractionApplied)?; - } - Ok(()) - } - - pub fn on_insert( - &mut self, - access: &dyn TargetPropertyAccess, - _step: u64, - conn: SessionId, - table: usize, - row: &SimRow, - in_tx: bool, - ) -> Result<(), String> { - self.models - .apply(&TableWorkloadInteraction::insert(conn, table, row.clone())); - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::RowInserted { - conn, - table, - row, - in_tx, - }, - )?; - } - Ok(()) - } - - pub fn on_delete( - &mut self, - access: &dyn TargetPropertyAccess, - _step: u64, - conn: SessionId, - table: usize, - row: &SimRow, - in_tx: bool, - ) -> Result<(), String> { - self.models - .apply(&TableWorkloadInteraction::delete(conn, table, row.clone())); - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::RowDeleted { - conn, - table, - row, - in_tx, - }, - )?; - } - Ok(()) - } - - pub fn on_expected_error( - &mut self, - access: &dyn TargetPropertyAccess, - kind: ExpectedErrorKind, - interaction: &TableWorkloadInteraction, - ) -> Result<(), String> { - if interaction.expected != ExpectedResult::Err(kind) { - return Err(format!( - "[ExpectedErrorMatches] expected {:?}, observed {kind:?} for {interaction:?}", - interaction.expected - )); - } - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry - .rule - .observe(&ctx, PropertyEvent::ExpectedError { kind, interaction })?; - } - Ok(()) - } - - pub fn on_point_lookup( - &mut self, - access: &dyn TargetPropertyAccess, - conn: SessionId, - table: usize, - id: u64, - actual: &Option, - ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::PointLookup { - conn, - table, - id, - actual, - }, - )?; - } - Ok(()) - } - - pub fn on_predicate_count( - &mut self, - access: &dyn TargetPropertyAccess, - conn: SessionId, - table: usize, - col: u16, - value: &AlgebraicValue, - actual: usize, - ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::PredicateCount { - conn, - table, - col, - value, - actual, - }, - )?; - } - Ok(()) - } - - #[allow(clippy::too_many_arguments)] - pub fn on_range_scan( - &mut self, - access: &dyn TargetPropertyAccess, - conn: SessionId, - table: usize, - cols: &[u16], - lower: &Bound, - upper: &Bound, - actual: &[SimRow], - ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::RangeScan { - conn, - table, - cols, - lower, - upper, - actual, - }, - )?; - } - Ok(()) - } - - pub fn on_full_scan( - &mut self, - access: &dyn TargetPropertyAccess, - conn: SessionId, - table: usize, - actual: &[SimRow], - ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry - .rule - .observe(&ctx, PropertyEvent::FullScan { conn, table, actual })?; - } - Ok(()) - } - - pub fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe(&ctx, PropertyEvent::CommitOrRollback)?; - } - Ok(()) - } - - pub fn on_dynamic_migration_probe( - &mut self, - access: &dyn TargetPropertyAccess, - probe: &DynamicMigrationProbe, - ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe(&ctx, PropertyEvent::DynamicMigrationProbe(probe))?; - } - Ok(()) - } - - pub fn on_durable_replay( - &mut self, - access: &dyn TargetPropertyAccess, - replay: &DurableReplaySummary, - ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe(&ctx, PropertyEvent::DurableReplay(replay))?; - } - Ok(()) - } - - pub fn on_table_workload_finish( - &mut self, - access: &dyn TargetPropertyAccess, - outcome: &TableWorkloadOutcome, - ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry - .rule - .observe(&ctx, PropertyEvent::TableWorkloadFinished(outcome))?; - } - Ok(()) - } - - fn observe_table_observation( - &mut self, - access: &dyn TargetPropertyAccess, - interaction: &TableWorkloadInteraction, - observation: &TableObservation, - ) -> Result<(), String> { - match observation { - TableObservation::Applied => {} - TableObservation::RowInserted { - conn, - table, - row, - in_tx, - } => self.on_insert(access, 0, *conn, *table, row, *in_tx)?, - TableObservation::RowDeleted { - conn, - table, - row, - in_tx, - } => self.on_delete(access, 0, *conn, *table, row, *in_tx)?, - TableObservation::ExpectedError(kind) => self.on_expected_error(access, *kind, interaction)?, - TableObservation::PointLookup { - conn, - table, - id, - actual, - } => self.on_point_lookup(access, *conn, *table, *id, actual)?, - TableObservation::PredicateCount { - conn, - table, - col, - value, - actual, - } => self.on_predicate_count(access, *conn, *table, *col, value, *actual)?, - TableObservation::RangeScan { - conn, - table, - cols, - lower, - upper, - actual, - } => self.on_range_scan(access, *conn, *table, cols, lower, upper, actual)?, - TableObservation::FullScan { conn, table, actual } => self.on_full_scan(access, *conn, *table, actual)?, - TableObservation::CommitOrRollback => {} - } - - self.on_table_interaction(access, interaction)?; - - if matches!(observation, TableObservation::CommitOrRollback) { - self.on_commit_or_rollback(access)?; - } - Ok(()) - } -} - -impl StreamingProperties for PropertyRuntime -where - E: crate::core::TargetEngine< - CommitlogInteraction, - Observation = CommitlogObservation, - Outcome = CommitlogWorkloadOutcome, - Error = String, - > + TargetPropertyAccess, -{ - fn observe( - &mut self, - engine: &E, - interaction: &CommitlogInteraction, - observation: &CommitlogObservation, - ) -> Result<(), String> { - match (interaction, observation) { - (CommitlogInteraction::Table(table_interaction), CommitlogObservation::Table(table_observation)) => { - self.observe_table_observation(engine, table_interaction, table_observation) - } - (_, CommitlogObservation::DynamicMigrationProbe(probe)) => self.on_dynamic_migration_probe(engine, probe), - (_, CommitlogObservation::DurableReplay(replay)) => self.on_durable_replay(engine, replay), - (_, CommitlogObservation::Applied | CommitlogObservation::Skipped) => Ok(()), - (other, observation) => Err(format!( - "observation {observation:?} does not match interaction {other:?}" - )), - } - } - - fn finish(&mut self, engine: &E, outcome: &CommitlogWorkloadOutcome) -> Result<(), String> { - self.on_durable_replay(engine, &outcome.replay)?; - self.on_table_workload_finish(engine, &outcome.table) - } -} - -struct RuleEntry { - rule: Box, -} - -impl RuleEntry { - fn new(kind: PropertyKind, rule: Box) -> Self { - let _ = kind; - Self { rule } - } - - fn non_periodic(rule: Box) -> Self { - Self { rule } - } -} - -impl Default for PropertyRuntime { - fn default() -> Self { - Self::with_kinds(&[ - PropertyKind::NotCrash, - PropertyKind::InsertSelect, - PropertyKind::DeleteSelect, - PropertyKind::SelectSelectOptimizer, - PropertyKind::WhereTrueFalseNull, - PropertyKind::IndexRangeExcluded, - PropertyKind::BankingTablesMatch, - PropertyKind::DynamicMigrationAutoInc, - PropertyKind::DurableReplayMatchesModel, - PropertyKind::ExpectedErrorMatches, - PropertyKind::PointLookupMatchesModel, - PropertyKind::PredicateCountMatchesModel, - PropertyKind::RangeScanMatchesModel, - PropertyKind::FullScanMatchesModel, - ]) - } -} - -trait PropertyRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let _ = ctx; - let _ = event; - Ok(()) - } -} - -#[derive(Default)] -struct NotCrashRule; - -impl PropertyRule for NotCrashRule {} - -struct ExpectedTableStateRule { - scenario: S, - schema: SchemaPlan, -} - -impl ExpectedTableStateRule { - fn new(scenario: S, schema: SchemaPlan) -> Self { - Self { scenario, schema } - } -} - -impl PropertyRule for ExpectedTableStateRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - match event { - PropertyEvent::TableWorkloadFinished(outcome) => { - let expected_rows = ctx.models.table().committed_rows(); - if outcome.final_rows != expected_rows { - return Err(format!( - "[ExpectedTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", - outcome.final_rows - )); - } - self.scenario - .validate_outcome(&self.schema, outcome) - .map_err(|err| format!("[ExpectedTableState] scenario invariant failed: {err}")) - } - _ => Ok(()), - } - } -} - -#[derive(Default)] -struct InsertSelectRule; - -impl PropertyRule for InsertSelectRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::RowInserted { conn, table, row, .. } = event else { - return Ok(()); - }; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - let found = ctx.access.lookup_in_connection(conn, table, id)?; - if found != Some(row.clone()) { - return Err(format!( - "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" - )); - } - Ok(()) - } -} - -#[derive(Default)] -struct DeleteSelectRule; - -impl PropertyRule for DeleteSelectRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::RowDeleted { conn, table, row, .. } = event else { - return Ok(()); - }; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; - if ctx.access.lookup_in_connection(conn, table, id)?.is_some() { - return Err(format!( - "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" - )); - } - Ok(()) - } -} - -fn post_write_check_tables(ctx: &PropertyContext<'_>, event: &PropertyEvent<'_>) -> Option> { - match event { - PropertyEvent::RowInserted { - table, in_tx: false, .. - } - | PropertyEvent::RowDeleted { - table, in_tx: false, .. - } => Some(vec![*table]), - PropertyEvent::CommitOrRollback => Some((0..ctx.access.schema_plan().tables.len()).collect()), - _ => None, - } -} - -#[derive(Default)] -struct NoRecRule; - -impl PropertyRule for NoRecRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let Some(tables) = post_write_check_tables(ctx, &event) else { - return Ok(()); - }; - for table in tables { - let table_plan = ctx - .access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let Some((col_idx, col_ty)) = table_plan - .columns - .iter() - .enumerate() - .skip(1) - .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) - .map(|(idx, col)| (idx as u16, &col.ty)) - else { - continue; - }; - let scanned_rows = ctx.access.collect_rows_for_table(table)?; - if scanned_rows.is_empty() { - continue; - } - let predicate_value = match col_ty { - AlgebraicType::Bool => AlgebraicValue::Bool(true), - AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), - _ => continue, - }; - let where_count = ctx.access.count_by_col_eq(table, col_idx, &predicate_value)?; - let projected_true_count = scanned_rows - .iter() - .filter(|row| row.values[col_idx as usize] == predicate_value) - .count(); - if where_count != projected_true_count { - return Err(format!( - "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" - )); - } - } - Ok(()) - } -} - -#[derive(Default)] -struct TlpRule; - -impl PropertyRule for TlpRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let Some(tables) = post_write_check_tables(ctx, &event) else { - return Ok(()); - }; - for table in tables { - let table_plan = ctx - .access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let Some(col_idx) = table_plan - .columns - .iter() - .enumerate() - .skip(1) - .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) - .map(|(idx, _)| idx as u16) - else { - continue; - }; - let total = ctx.access.count_rows(table)?; - let true_count = ctx - .access - .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; - let false_count = ctx - .access - .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; - let partition_sum = true_count + false_count; - if partition_sum != total { - return Err(format!( - "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" - )); - } - } - Ok(()) - } -} - -#[derive(Default)] -struct IndexRangeExcludedRule; - -impl PropertyRule for IndexRangeExcludedRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let Some(tables) = post_write_check_tables(ctx, &event) else { - return Ok(()); - }; - const MAX_ROWS_FOR_INDEX_SCAN_CHECK: usize = 512; - - for table in tables { - let table_plan = ctx - .access - .schema_plan() - .tables - .get(table) - .ok_or_else(|| format!("table {table} out of range"))?; - let rows = ctx.access.collect_rows_for_table(table)?; - if rows.len() < 2 || rows.len() > MAX_ROWS_FOR_INDEX_SCAN_CHECK { - continue; - } - - for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { - if !cols.iter().all(|&col| { - matches!( - table_plan.columns[col as usize].ty, - AlgebraicType::U64 | AlgebraicType::Bool - ) - }) { - continue; - } - - let mut sorted_rows = rows.clone(); - sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); - let upper_key = sorted_rows[sorted_rows.len() - 1] - .project_key(cols) - .to_algebraic_value(); - let lower = Bound::Included(lower_key.clone()); - let upper = Bound::Excluded(upper_key.clone()); - - let mut expected_rows = sorted_rows - .into_iter() - .filter(|row| { - let key = row.project_key(cols).to_algebraic_value(); - key >= lower_key && key < upper_key - }) - .collect::>(); - expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - let mut actual_rows = ctx.access.range_scan(table, cols, lower, upper)?; - actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); - - if actual_rows != expected_rows { - return Err(format!( - "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" - )); - } - } - } - - Ok(()) - } -} - -#[derive(Default)] -struct BankingMatchRule; - -impl PropertyRule for BankingMatchRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - match event { - PropertyEvent::RowInserted { in_tx: false, .. } - | PropertyEvent::RowDeleted { in_tx: false, .. } - | PropertyEvent::CommitOrRollback => check_banking_tables_match(ctx.access), - _ => Ok(()), - } - } -} - -#[derive(Default)] -struct DynamicMigrationAutoIncRule; - -impl PropertyRule for DynamicMigrationAutoIncRule { - fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::DynamicMigrationProbe(probe) = event else { - return Ok(()); - }; - let max_existing_id = probe - .existing_rows - .iter() - .filter_map(sim_row_integer_id) - .max() - .unwrap_or(0); - let inserted_id = sim_row_integer_id(&probe.inserted_row).ok_or_else(|| { - format!( - "[DynamicMigrationAutoInc] probe row missing integer id for slot={}, from_version={}, to_version={}: {:?}", - probe.slot, probe.from_version, probe.to_version, probe.inserted_row - ) - })?; - if inserted_id <= max_existing_id { - return Err(format!( - "[DynamicMigrationAutoInc] non-advancing id for slot={}, from_version={}, to_version={}: inserted_id={}, max_existing_id={}", - probe.slot, probe.from_version, probe.to_version, inserted_id, max_existing_id - )); - } - Ok(()) - } -} - -#[derive(Default)] -struct DurableReplayMatchesModelRule; - -impl PropertyRule for DurableReplayMatchesModelRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::DurableReplay(replay) = event else { - return Ok(()); - }; - let expected_rows = ctx.models.table().committed_rows(); - if replay.base_rows != expected_rows { - return Err(format!( - "[DurableReplayMatchesModel] replayed durable state mismatch at offset {:?}: expected={expected_rows:?} actual={:?}", - replay.durable_offset, replay.base_rows - )); - } - Ok(()) - } -} - -#[derive(Default)] -struct ExpectedErrorMatchesRule; - -impl PropertyRule for ExpectedErrorMatchesRule { - fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::ExpectedError { kind, interaction } = event else { - return Ok(()); - }; - if interaction.expected == ExpectedResult::Err(kind) { - Ok(()) - } else { - Err(format!( - "[ExpectedErrorMatches] observed {kind:?}, but interaction expected {:?}: {interaction:?}", - interaction.expected - )) - } - } -} - -#[derive(Default)] -struct PointLookupMatchesModelRule; - -impl PropertyRule for PointLookupMatchesModelRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::PointLookup { - conn, - table, - id, - actual, - } = event - else { - return Ok(()); - }; - let expected = ctx.models.table().lookup_by_id(conn, table, id); - if *actual != expected { - return Err(format!( - "[Model::PointLookup] mismatch conn={conn}, table={table}, id={id}: expected={expected:?}, actual={actual:?}" - )); - } - Ok(()) - } -} - -#[derive(Default)] -struct PredicateCountMatchesModelRule; - -impl PropertyRule for PredicateCountMatchesModelRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::PredicateCount { - conn, - table, - col, - value, - actual, - } = event - else { - return Ok(()); - }; - let expected = ctx.models.table().predicate_count(conn, table, col, value); - if actual != expected { - return Err(format!( - "[Model::PredicateCount] mismatch conn={conn}, table={table}, col={col}, value={value:?}: expected={expected}, actual={actual}" - )); - } - Ok(()) - } -} - -#[derive(Default)] -struct RangeScanMatchesModelRule; - -impl PropertyRule for RangeScanMatchesModelRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::RangeScan { - conn, - table, - cols, - lower, - upper, - actual, - } = event - else { - return Ok(()); - }; - let expected = ctx.models.table().range_scan(conn, table, cols, lower, upper); - if actual != expected.as_slice() { - return Err(format!( - "[Model::RangeScan] mismatch conn={conn}, table={table}, cols={cols:?}, lower={lower:?}, upper={upper:?}: expected={expected:?}, actual={actual:?}" - )); - } - Ok(()) - } -} - -#[derive(Default)] -struct FullScanMatchesModelRule; - -impl PropertyRule for FullScanMatchesModelRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::FullScan { conn, table, actual } = event else { - return Ok(()); - }; - let expected = ctx.models.table().full_scan(conn, table); - if actual != expected.as_slice() { - return Err(format!( - "[Model::FullScan] mismatch conn={conn}, table={table}: expected={expected:?}, actual={actual:?}" - )); - } - Ok(()) - } -} - -fn check_banking_tables_match(access: &dyn TargetPropertyAccess) -> Result<(), String> { - let schema = access.schema_plan(); - let debit = schema.tables.iter().position(|table| table.name == "debit_accounts"); - let credit = schema.tables.iter().position(|table| table.name == "credit_accounts"); - let (Some(left), Some(right)) = (debit, credit) else { - return Ok(()); - }; - - let left_rows = access.collect_rows_for_table(left)?; - let right_rows = access.collect_rows_for_table(right)?; - if left_rows != right_rows { - return Err(format!( - "[Shadow::AllTableHaveExpectedContent] banking mismatch: debit={left_rows:?}, credit={right_rows:?}" - )); - } - Ok(()) -} - -fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { - lhs.project_key(cols) - .to_algebraic_value() - .cmp(&rhs.project_key(cols).to_algebraic_value()) - .then_with(|| lhs.values.cmp(&rhs.values)) -} - -fn sim_row_integer_id(row: &SimRow) -> Option { - match row.values.first() { - Some(AlgebraicValue::I64(value)) => Some(*value as i128), - Some(AlgebraicValue::U64(value)) => Some(*value as i128), - _ => None, - } -} diff --git a/crates/dst/src/properties/rules.rs b/crates/dst/src/properties/rules.rs new file mode 100644 index 00000000000..9e5a13f9fdd --- /dev/null +++ b/crates/dst/src/properties/rules.rs @@ -0,0 +1,496 @@ +use std::ops::Bound; + +use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; + +use crate::{ + schema::{SchemaPlan, SimRow}, + workload::table_ops::{ExpectedResult, TableScenario}, +}; + +use super::{PropertyContext, PropertyEvent, PropertyKind, TargetPropertyAccess}; + +pub(super) trait PropertyRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let _ = ctx; + let _ = event; + Ok(()) + } +} + +pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { + match kind { + PropertyKind::NotCrash => Box::::default(), + PropertyKind::InsertSelect => Box::::default(), + PropertyKind::DeleteSelect => Box::::default(), + PropertyKind::SelectSelectOptimizer => Box::::default(), + PropertyKind::WhereTrueFalseNull => Box::::default(), + PropertyKind::IndexRangeExcluded => Box::::default(), + PropertyKind::BankingTablesMatch => Box::::default(), + PropertyKind::DynamicMigrationAutoInc => Box::::default(), + PropertyKind::DurableReplayMatchesModel => Box::::default(), + PropertyKind::ExpectedErrorMatches => Box::::default(), + PropertyKind::PointLookupMatchesModel => Box::::default(), + PropertyKind::PredicateCountMatchesModel => Box::::default(), + PropertyKind::RangeScanMatchesModel => Box::::default(), + PropertyKind::FullScanMatchesModel => Box::::default(), + } +} + +pub(super) fn expected_table_state_rule(scenario: S, schema: SchemaPlan) -> Box +where + S: TableScenario + 'static, +{ + Box::new(ExpectedTableStateRule::new(scenario, schema)) +} + +#[derive(Default)] +struct NotCrashRule; + +impl PropertyRule for NotCrashRule {} + +struct ExpectedTableStateRule { + scenario: S, + schema: SchemaPlan, +} + +impl ExpectedTableStateRule { + fn new(scenario: S, schema: SchemaPlan) -> Self { + Self { scenario, schema } + } +} + +impl PropertyRule for ExpectedTableStateRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + match event { + PropertyEvent::TableWorkloadFinished(outcome) => { + let expected_rows = ctx.models.table().committed_rows(); + if outcome.final_rows != expected_rows { + return Err(format!( + "[ExpectedTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + )); + } + self.scenario + .validate_outcome(&self.schema, outcome) + .map_err(|err| format!("[ExpectedTableState] scenario invariant failed: {err}")) + } + _ => Ok(()), + } + } +} + +#[derive(Default)] +struct InsertSelectRule; + +impl PropertyRule for InsertSelectRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RowInserted { conn, table, row, .. } = event else { + return Ok(()); + }; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let found = ctx.access.lookup_in_connection(conn, table, id)?; + if found != Some(row.clone()) { + return Err(format!( + "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct DeleteSelectRule; + +impl PropertyRule for DeleteSelectRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RowDeleted { conn, table, row, .. } = event else { + return Ok(()); + }; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if ctx.access.lookup_in_connection(conn, table, id)?.is_some() { + return Err(format!( + "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" + )); + } + Ok(()) + } +} + +fn post_write_check_tables(ctx: &PropertyContext<'_>, event: &PropertyEvent<'_>) -> Option> { + match event { + PropertyEvent::RowInserted { + table, in_tx: false, .. + } + | PropertyEvent::RowDeleted { + table, in_tx: false, .. + } => Some(vec![*table]), + PropertyEvent::CommitOrRollback => Some((0..ctx.access.schema_plan().tables.len()).collect()), + _ => None, + } +} + +#[derive(Default)] +struct NoRecRule; + +impl PropertyRule for NoRecRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some((col_idx, col_ty)) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) + .map(|(idx, col)| (idx as u16, &col.ty)) + else { + continue; + }; + let scanned_rows = ctx.access.collect_rows_for_table(table)?; + if scanned_rows.is_empty() { + continue; + } + let predicate_value = match col_ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), + _ => continue, + }; + let where_count = ctx.access.count_by_col_eq(table, col_idx, &predicate_value)?; + let projected_true_count = scanned_rows + .iter() + .filter(|row| row.values[col_idx as usize] == predicate_value) + .count(); + if where_count != projected_true_count { + return Err(format!( + "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" + )); + } + } + Ok(()) + } +} + +#[derive(Default)] +struct TlpRule; + +impl PropertyRule for TlpRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some(col_idx) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) + .map(|(idx, _)| idx as u16) + else { + continue; + }; + let total = ctx.access.count_rows(table)?; + let true_count = ctx + .access + .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; + let false_count = ctx + .access + .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; + let partition_sum = true_count + false_count; + if partition_sum != total { + return Err(format!( + "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" + )); + } + } + Ok(()) + } +} + +#[derive(Default)] +struct IndexRangeExcludedRule; + +impl PropertyRule for IndexRangeExcludedRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + const MAX_ROWS_FOR_INDEX_SCAN_CHECK: usize = 512; + + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let rows = ctx.access.collect_rows_for_table(table)?; + if rows.len() < 2 || rows.len() > MAX_ROWS_FOR_INDEX_SCAN_CHECK { + continue; + } + + for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { + if !cols.iter().all(|&col| { + matches!( + table_plan.columns[col as usize].ty, + AlgebraicType::U64 | AlgebraicType::Bool + ) + }) { + continue; + } + + let mut sorted_rows = rows.clone(); + sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); + let upper_key = sorted_rows[sorted_rows.len() - 1] + .project_key(cols) + .to_algebraic_value(); + let lower = Bound::Included(lower_key.clone()); + let upper = Bound::Excluded(upper_key.clone()); + + let mut expected_rows = sorted_rows + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + key >= lower_key && key < upper_key + }) + .collect::>(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let mut actual_rows = ctx.access.range_scan(table, cols, lower, upper)?; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + if actual_rows != expected_rows { + return Err(format!( + "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" + )); + } + } + } + + Ok(()) + } +} + +#[derive(Default)] +struct BankingMatchRule; + +impl PropertyRule for BankingMatchRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + match event { + PropertyEvent::RowInserted { in_tx: false, .. } + | PropertyEvent::RowDeleted { in_tx: false, .. } + | PropertyEvent::CommitOrRollback => check_banking_tables_match(ctx.access), + _ => Ok(()), + } + } +} + +#[derive(Default)] +struct DynamicMigrationAutoIncRule; + +impl PropertyRule for DynamicMigrationAutoIncRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::DynamicMigrationProbe(probe) = event else { + return Ok(()); + }; + let max_existing_id = probe + .existing_rows + .iter() + .filter_map(sim_row_integer_id) + .max() + .unwrap_or(0); + let inserted_id = sim_row_integer_id(&probe.inserted_row).ok_or_else(|| { + format!( + "[DynamicMigrationAutoInc] probe row missing integer id for slot={}, from_version={}, to_version={}: {:?}", + probe.slot, probe.from_version, probe.to_version, probe.inserted_row + ) + })?; + if inserted_id <= max_existing_id { + return Err(format!( + "[DynamicMigrationAutoInc] non-advancing id for slot={}, from_version={}, to_version={}: inserted_id={}, max_existing_id={}", + probe.slot, probe.from_version, probe.to_version, inserted_id, max_existing_id + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct DurableReplayMatchesModelRule; + +impl PropertyRule for DurableReplayMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::DurableReplay(replay) = event else { + return Ok(()); + }; + let expected_rows = ctx.models.table().committed_rows(); + if replay.base_rows != expected_rows { + return Err(format!( + "[DurableReplayMatchesModel] replayed durable state mismatch at offset {:?}: expected={expected_rows:?} actual={:?}", + replay.durable_offset, replay.base_rows + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct ExpectedErrorMatchesRule; + +impl PropertyRule for ExpectedErrorMatchesRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::ExpectedError { kind, interaction } = event else { + return Ok(()); + }; + if interaction.expected == ExpectedResult::Err(kind) { + Ok(()) + } else { + Err(format!( + "[ExpectedErrorMatches] observed {kind:?}, but interaction expected {:?}: {interaction:?}", + interaction.expected + )) + } + } +} + +#[derive(Default)] +struct PointLookupMatchesModelRule; + +impl PropertyRule for PointLookupMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::PointLookup { + conn, + table, + id, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().lookup_by_id(conn, table, id); + if *actual != expected { + return Err(format!( + "[Model::PointLookup] mismatch conn={conn}, table={table}, id={id}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct PredicateCountMatchesModelRule; + +impl PropertyRule for PredicateCountMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().predicate_count(conn, table, col, value); + if actual != expected { + return Err(format!( + "[Model::PredicateCount] mismatch conn={conn}, table={table}, col={col}, value={value:?}: expected={expected}, actual={actual}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct RangeScanMatchesModelRule; + +impl PropertyRule for RangeScanMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().range_scan(conn, table, cols, lower, upper); + if actual != expected.as_slice() { + return Err(format!( + "[Model::RangeScan] mismatch conn={conn}, table={table}, cols={cols:?}, lower={lower:?}, upper={upper:?}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct FullScanMatchesModelRule; + +impl PropertyRule for FullScanMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::FullScan { conn, table, actual } = event else { + return Ok(()); + }; + let expected = ctx.models.table().full_scan(conn, table); + if actual != expected.as_slice() { + return Err(format!( + "[Model::FullScan] mismatch conn={conn}, table={table}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +fn check_banking_tables_match(access: &dyn TargetPropertyAccess) -> Result<(), String> { + let schema = access.schema_plan(); + let debit = schema.tables.iter().position(|table| table.name == "debit_accounts"); + let credit = schema.tables.iter().position(|table| table.name == "credit_accounts"); + let (Some(left), Some(right)) = (debit, credit) else { + return Ok(()); + }; + + let left_rows = access.collect_rows_for_table(left)?; + let right_rows = access.collect_rows_for_table(right)?; + if left_rows != right_rows { + return Err(format!( + "[Shadow::AllTableHaveExpectedContent] banking mismatch: debit={left_rows:?}, credit={right_rows:?}" + )); + } + Ok(()) +} + +fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) +} + +fn sim_row_integer_id(row: &SimRow) -> Option { + match row.values.first() { + Some(AlgebraicValue::I64(value)) => Some(*value as i128), + Some(AlgebraicValue::U64(value)) => Some(*value as i128), + _ => None, + } +} diff --git a/crates/dst/src/properties/runtime.rs b/crates/dst/src/properties/runtime.rs new file mode 100644 index 00000000000..e7224b4df5c --- /dev/null +++ b/crates/dst/src/properties/runtime.rs @@ -0,0 +1,512 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + core::{StreamingProperties, TargetEngine}, + schema::{SchemaPlan, SimRow}, + workload::{ + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, + table_ops::{ + ExpectedErrorKind, ExpectedModel, ExpectedResult, TableOperation, TableScenario, TableWorkloadInteraction, + TableWorkloadOutcome, + }, + }, +}; + +use super::{ + rules::{expected_table_state_rule, rule_for_kind, PropertyRule}, + CommitlogObservation, DynamicMigrationProbe, PropertyContext, PropertyEvent, PropertyKind, TableObservation, + TargetPropertyAccess, +}; + +#[derive(Clone, Debug)] +pub(super) struct PropertyModels { + table: TableModel, +} + +#[derive(Clone, Debug)] +pub(super) struct TableModel { + expected: ExpectedModel, +} + +impl PropertyModels { + pub(super) fn new(table_count: usize, num_connections: usize) -> Self { + Self { + table: TableModel { + expected: ExpectedModel::new(table_count, num_connections), + }, + } + } + + pub(super) fn table(&self) -> &TableModel { + &self.table + } + + fn apply(&mut self, interaction: &TableWorkloadInteraction) { + self.table.expected.apply(interaction); + } +} + +impl TableModel { + pub(super) fn committed_rows(&self) -> Vec> { + self.expected.clone().committed_rows() + } + + pub(super) fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { + self.expected.lookup_by_id(conn, table, id) + } + + pub(super) fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { + self.expected.predicate_count(conn, table, col, value) + } + + pub(super) fn range_scan( + &self, + conn: SessionId, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + ) -> Vec { + self.expected.range_scan(conn, table, cols, lower, upper) + } + + pub(super) fn full_scan(&self, conn: SessionId, table: usize) -> Vec { + let mut rows = self.expected.visible_rows(conn, table); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + rows + } +} + +/// Mutable runtime holding selected property implementations. +pub(crate) struct PropertyRuntime { + rules: Vec, + models: PropertyModels, +} + +impl PropertyRuntime { + pub fn with_kinds(kinds: &[PropertyKind]) -> Self { + let rules = kinds.iter().copied().map(rule_for_kind).map(RuleEntry::new).collect(); + Self { + rules, + models: PropertyModels::new(0, 0), + } + } + + pub fn for_table_workload(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self + where + S: TableScenario + 'static, + { + let mut runtime = Self { + models: PropertyModels::new(schema.tables.len(), num_connections), + ..Self::default() + }; + runtime + .rules + .push(RuleEntry::new(expected_table_state_rule(scenario, schema))); + runtime + } + + pub fn on_table_interaction( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + match &interaction.op { + TableOperation::BeginTx { .. } + | TableOperation::CommitTx { .. } + | TableOperation::RollbackTx { .. } + | TableOperation::BeginReadTx { .. } + | TableOperation::ReleaseReadTx { .. } => self.models.apply(interaction), + TableOperation::BatchInsert { .. } + | TableOperation::BatchDelete { .. } + | TableOperation::Reinsert { .. } + | TableOperation::AddColumn { .. } + | TableOperation::AddIndex { .. } => self.models.apply(interaction), + TableOperation::Insert { .. } + | TableOperation::Delete { .. } + | TableOperation::BeginTxConflict { .. } + | TableOperation::WriteConflictInsert { .. } + | TableOperation::ExactDuplicateInsert { .. } + | TableOperation::UniqueKeyConflictInsert { .. } + | TableOperation::DeleteMissing { .. } + | TableOperation::PointLookup { .. } + | TableOperation::PredicateCount { .. } + | TableOperation::RangeScan { .. } + | TableOperation::FullScan { .. } => {} + } + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe(&ctx, PropertyEvent::TableInteractionApplied)?; + } + Ok(()) + } + + pub fn on_insert( + &mut self, + access: &dyn TargetPropertyAccess, + _step: u64, + conn: SessionId, + table: usize, + row: &SimRow, + in_tx: bool, + ) -> Result<(), String> { + self.models + .apply(&TableWorkloadInteraction::insert(conn, table, row.clone())); + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::RowInserted { + conn, + table, + row, + in_tx, + }, + )?; + } + Ok(()) + } + + pub fn on_delete( + &mut self, + access: &dyn TargetPropertyAccess, + _step: u64, + conn: SessionId, + table: usize, + row: &SimRow, + in_tx: bool, + ) -> Result<(), String> { + self.models + .apply(&TableWorkloadInteraction::delete(conn, table, row.clone())); + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::RowDeleted { + conn, + table, + row, + in_tx, + }, + )?; + } + Ok(()) + } + + pub fn on_expected_error( + &mut self, + access: &dyn TargetPropertyAccess, + kind: ExpectedErrorKind, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + if interaction.expected != ExpectedResult::Err(kind) { + return Err(format!( + "[ExpectedErrorMatches] expected {:?}, observed {kind:?} for {interaction:?}", + interaction.expected + )); + } + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry + .rule + .observe(&ctx, PropertyEvent::ExpectedError { kind, interaction })?; + } + Ok(()) + } + + pub fn on_point_lookup( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + id: u64, + actual: &Option, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::PointLookup { + conn, + table, + id, + actual, + }, + )?; + } + Ok(()) + } + + pub fn on_predicate_count( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + col: u16, + value: &AlgebraicValue, + actual: usize, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + }, + )?; + } + Ok(()) + } + + #[allow(clippy::too_many_arguments)] + pub fn on_range_scan( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + actual: &[SimRow], + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + }, + )?; + } + Ok(()) + } + + pub fn on_full_scan( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + actual: &[SimRow], + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry + .rule + .observe(&ctx, PropertyEvent::FullScan { conn, table, actual })?; + } + Ok(()) + } + + pub fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe(&ctx, PropertyEvent::CommitOrRollback)?; + } + Ok(()) + } + + pub fn on_dynamic_migration_probe( + &mut self, + access: &dyn TargetPropertyAccess, + probe: &DynamicMigrationProbe, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe(&ctx, PropertyEvent::DynamicMigrationProbe(probe))?; + } + Ok(()) + } + + pub fn on_durable_replay( + &mut self, + access: &dyn TargetPropertyAccess, + replay: &DurableReplaySummary, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe(&ctx, PropertyEvent::DurableReplay(replay))?; + } + Ok(()) + } + + pub fn on_table_workload_finish( + &mut self, + access: &dyn TargetPropertyAccess, + outcome: &TableWorkloadOutcome, + ) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry + .rule + .observe(&ctx, PropertyEvent::TableWorkloadFinished(outcome))?; + } + Ok(()) + } + + fn observe_table_observation( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + observation: &TableObservation, + ) -> Result<(), String> { + match observation { + TableObservation::Applied => {} + TableObservation::RowInserted { + conn, + table, + row, + in_tx, + } => self.on_insert(access, 0, *conn, *table, row, *in_tx)?, + TableObservation::RowDeleted { + conn, + table, + row, + in_tx, + } => self.on_delete(access, 0, *conn, *table, row, *in_tx)?, + TableObservation::ExpectedError(kind) => self.on_expected_error(access, *kind, interaction)?, + TableObservation::PointLookup { + conn, + table, + id, + actual, + } => self.on_point_lookup(access, *conn, *table, *id, actual)?, + TableObservation::PredicateCount { + conn, + table, + col, + value, + actual, + } => self.on_predicate_count(access, *conn, *table, *col, value, *actual)?, + TableObservation::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + } => self.on_range_scan(access, *conn, *table, cols, lower, upper, actual)?, + TableObservation::FullScan { conn, table, actual } => self.on_full_scan(access, *conn, *table, actual)?, + TableObservation::CommitOrRollback => {} + } + + self.on_table_interaction(access, interaction)?; + + if matches!(observation, TableObservation::CommitOrRollback) { + self.on_commit_or_rollback(access)?; + } + Ok(()) + } +} + +impl StreamingProperties for PropertyRuntime +where + E: TargetEngine< + CommitlogInteraction, + Observation = CommitlogObservation, + Outcome = CommitlogWorkloadOutcome, + Error = String, + > + TargetPropertyAccess, +{ + fn observe( + &mut self, + engine: &E, + interaction: &CommitlogInteraction, + observation: &CommitlogObservation, + ) -> Result<(), String> { + match (interaction, observation) { + (CommitlogInteraction::Table(table_interaction), CommitlogObservation::Table(table_observation)) => { + self.observe_table_observation(engine, table_interaction, table_observation) + } + (_, CommitlogObservation::DynamicMigrationProbe(probe)) => self.on_dynamic_migration_probe(engine, probe), + (_, CommitlogObservation::DurableReplay(replay)) => self.on_durable_replay(engine, replay), + (_, CommitlogObservation::Applied | CommitlogObservation::Skipped) => Ok(()), + (other, observation) => Err(format!( + "observation {observation:?} does not match interaction {other:?}" + )), + } + } + + fn finish(&mut self, engine: &E, outcome: &CommitlogWorkloadOutcome) -> Result<(), String> { + self.on_durable_replay(engine, &outcome.replay)?; + self.on_table_workload_finish(engine, &outcome.table) + } +} + +struct RuleEntry { + rule: Box, +} + +impl RuleEntry { + fn new(rule: Box) -> Self { + Self { rule } + } +} + +impl Default for PropertyRuntime { + fn default() -> Self { + Self::with_kinds(&[ + PropertyKind::NotCrash, + PropertyKind::InsertSelect, + PropertyKind::DeleteSelect, + PropertyKind::SelectSelectOptimizer, + PropertyKind::WhereTrueFalseNull, + PropertyKind::IndexRangeExcluded, + PropertyKind::BankingTablesMatch, + PropertyKind::DynamicMigrationAutoInc, + PropertyKind::DurableReplayMatchesModel, + PropertyKind::ExpectedErrorMatches, + PropertyKind::PointLookupMatchesModel, + PropertyKind::PredicateCountMatchesModel, + PropertyKind::RangeScanMatchesModel, + PropertyKind::FullScanMatchesModel, + ]) + } +} diff --git a/crates/dst/src/targets/buggified_repo.rs b/crates/dst/src/targets/buggified_repo.rs index e4bc4ed66f1..57796b7d3e9 100644 --- a/crates/dst/src/targets/buggified_repo.rs +++ b/crates/dst/src/targets/buggified_repo.rs @@ -379,9 +379,9 @@ impl FaultController { } else { Duration::from_millis(1) }; - #[cfg(madsim)] + #[cfg(all(simulation, madsim))] madsim::time::advance(latency); - #[cfg(not(madsim))] + #[cfg(not(all(simulation, madsim)))] let _ = latency; } } @@ -412,11 +412,11 @@ impl FaultController { return false; } - #[cfg(madsim)] + #[cfg(simulation)] { madsim::buggify::buggify_with_prob(probability) } - #[cfg(not(madsim))] + #[cfg(not(simulation))] { let _ = probability; false diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 9644011e547..df7a3d637cb 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -1428,7 +1428,7 @@ fn bootstrap_relational_db( (runtime.handle().clone(), Some(runtime)) }; let fault_config = CommitlogFaultConfig::for_profile(fault_profile); - configure_madsim_buggify(fault_config.enabled()); + configure_simulation_buggify(fault_config.enabled()); let commitlog_repo = BuggifiedRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024), fault_config); let durability_opts = commitlog_stress_options(seed.fork(701)); @@ -1473,8 +1473,8 @@ fn commitlog_stress_options(seed: DstSeed) -> spacetimedb_durability::local::Opt opts } -fn configure_madsim_buggify(enabled: bool) { - #[cfg(madsim)] +fn configure_simulation_buggify(enabled: bool) { + #[cfg(simulation)] { if enabled { madsim::buggify::enable(); @@ -1482,7 +1482,7 @@ fn configure_madsim_buggify(enabled: bool) { madsim::buggify::disable(); } } - #[cfg(not(madsim))] + #[cfg(not(simulation))] let _ = enabled; } diff --git a/crates/dst/tests/madsim_tcp.rs b/crates/dst/tests/madsim_tcp.rs index 06574c43965..d5e587d9fc8 100644 --- a/crates/dst/tests/madsim_tcp.rs +++ b/crates/dst/tests/madsim_tcp.rs @@ -1,4 +1,4 @@ -#![cfg(madsim)] +#![cfg(all(simulation, madsim))] use std::{net::SocketAddr, sync::Arc}; diff --git a/crates/io/build.rs b/crates/io/build.rs new file mode 100644 index 00000000000..3982c077afc --- /dev/null +++ b/crates/io/build.rs @@ -0,0 +1,10 @@ +fn main() { + println!("cargo:rerun-if-env-changed=CARGO_CFG_MADSIM"); + println!("cargo:rerun-if-env-changed=CARGO_CFG_SIMULATION"); + println!("cargo:rerun-if-env-changed=CARGO_ENCODED_RUSTFLAGS"); + println!("cargo:rerun-if-env-changed=RUSTFLAGS"); + + if std::env::var_os("CARGO_CFG_MADSIM").is_some() { + println!("cargo:rustc-cfg=simulation"); + } +} diff --git a/crates/io/src/lib.rs b/crates/io/src/lib.rs index 1640d961cc8..50179e7221f 100644 --- a/crates/io/src/lib.rs +++ b/crates/io/src/lib.rs @@ -1,8 +1,8 @@ //! Narrow facade for SpacetimeDB-owned async IO boundaries. //! //! Production builds use Tokio through the `madsim-tokio` compatibility crate. -//! Builds compiled with `--cfg madsim` use the simulator implementations exposed -//! by that same compatibility crate. +//! Simulation builds use the simulator implementations exposed by that same +//! compatibility crate. //! //! This crate is intentionally small. It is a migration point for filesystem and //! network APIs reached by deterministic simulation tests, not a general runtime @@ -11,7 +11,7 @@ pub mod fs { pub use tokio::fs::*; - #[cfg(madsim)] + #[cfg(simulation)] use std::{ io::{self, Read as _}, pin::Pin, @@ -19,34 +19,35 @@ pub mod fs { }; /// Async reader type returned by [`file_from_std`]. - #[cfg(not(madsim))] + #[cfg(not(simulation))] pub type FileFromStd = tokio::fs::File; /// Async reader type returned by [`file_from_std`]. - #[cfg(madsim)] + #[cfg(simulation)] pub type FileFromStd = StdFileAsyncReader; /// Convert a standard file handle into an async reader. /// - /// Tokio supports this directly. The madsim filesystem type does not wrap - /// existing OS files, so madsim builds use a small `AsyncRead` adapter for - /// call sites that only need to stream an already-opened std file. - #[cfg(not(madsim))] + /// Tokio supports this directly. The simulated filesystem type does not + /// wrap existing OS files, so simulation builds use a small `AsyncRead` + /// adapter for call sites that only need to stream an already-opened std + /// file. + #[cfg(not(simulation))] pub fn file_from_std(file: std::fs::File) -> FileFromStd { tokio::fs::File::from_std(file) } /// Convert a standard file handle into an async reader. - #[cfg(madsim)] + #[cfg(simulation)] pub fn file_from_std(file: std::fs::File) -> FileFromStd { StdFileAsyncReader(file) } - /// Async-read adapter for standard files in madsim builds. - #[cfg(madsim)] + /// Async-read adapter for standard files in simulation builds. + #[cfg(simulation)] pub struct StdFileAsyncReader(std::fs::File); - #[cfg(madsim)] + #[cfg(simulation)] impl tokio::io::AsyncRead for StdFileAsyncReader { fn poll_read( mut self: Pin<&mut Self>, diff --git a/crates/standalone/build.rs b/crates/standalone/build.rs new file mode 100644 index 00000000000..3982c077afc --- /dev/null +++ b/crates/standalone/build.rs @@ -0,0 +1,10 @@ +fn main() { + println!("cargo:rerun-if-env-changed=CARGO_CFG_MADSIM"); + println!("cargo:rerun-if-env-changed=CARGO_CFG_SIMULATION"); + println!("cargo:rerun-if-env-changed=CARGO_ENCODED_RUSTFLAGS"); + println!("cargo:rerun-if-env-changed=RUSTFLAGS"); + + if std::env::var_os("CARGO_CFG_MADSIM").is_some() { + println!("cargo:rustc-cfg=simulation"); + } +} diff --git a/crates/standalone/src/subcommands/start.rs b/crates/standalone/src/subcommands/start.rs index e2ee2ca6e43..dc199ced402 100644 --- a/crates/standalone/src/subcommands/start.rs +++ b/crates/standalone/src/subcommands/start.rs @@ -1,13 +1,18 @@ +#[cfg(not(simulation))] use netstat2::{get_sockets_info, AddressFamilyFlags, ProtocolFlags, ProtocolSocketInfo, TcpState}; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::identity::IdentityRoutes; -#[cfg(not(madsim))] +#[cfg(not(simulation))] use spacetimedb_pg::pg_server; +#[cfg(not(simulation))] use std::io::{self, Write}; +#[cfg(not(simulation))] use std::net::IpAddr; use std::sync::Arc; use crate::{StandaloneEnv, StandaloneOptions}; use anyhow::Context; +#[cfg(not(simulation))] use axum::extract::DefaultBodyLimit; use clap::ArgAction::SetTrue; use clap::{Arg, ArgMatches}; @@ -16,12 +21,14 @@ use spacetimedb::db::{self, Storage}; use spacetimedb::startup::{self, TracingOptions}; use spacetimedb::util::jobs::JobCores; use spacetimedb::worker_metrics; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::database::DatabaseRoutes; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::router; use spacetimedb_client_api::routes::subscribe::WebSocketOptions; use spacetimedb_paths::cli::{PrivKeyPath, PubKeyPath}; use spacetimedb_paths::server::{ConfigToml, ServerDataDir}; -#[cfg(not(madsim))] +#[cfg(not(simulation))] use tokio::net::TcpListener; pub fn cli() -> clap::Command { @@ -113,6 +120,7 @@ impl ConfigFile { pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { let listen_addr = args.get_one::("listen_addr").unwrap(); let pg_port = args.get_one::("pg_port"); + #[cfg(not(simulation))] let non_interactive = args.get_flag("non_interactive"); let cert_dir = args.get_one::("jwt_key_dir"); let certs = Option::zip( @@ -199,26 +207,26 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { ); worker_metrics::spawn_page_pool_stats(listen_addr.clone(), ctx.page_pool().clone()); worker_metrics::spawn_bsatn_rlb_pool_stats(listen_addr.clone(), ctx.bsatn_rlb_pool().clone()); - #[cfg(madsim)] + #[cfg(simulation)] { let _ = (pg_port, ctx, listen_addr); - anyhow::bail!("standalone start server mode is not supported under madsim"); + anyhow::bail!("standalone start server mode is not supported under simulation"); } - #[cfg(not(madsim))] + #[cfg(not(simulation))] let mut db_routes = DatabaseRoutes::default(); - #[cfg(not(madsim))] + #[cfg(not(simulation))] { db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); } - #[cfg(not(madsim))] + #[cfg(not(simulation))] let extra = axum::Router::new().nest("/health", spacetimedb_client_api::routes::health::router()); - #[cfg(not(madsim))] + #[cfg(not(simulation))] let service = router(&ctx, db_routes, IdentityRoutes::default(), extra).with_state(ctx.clone()); - #[cfg(not(madsim))] + #[cfg(not(simulation))] // Check if the requested port is available on both IPv4 and IPv6. // If not, offer to find an available port by incrementing (unless non-interactive). let listen_addr = if let Some((host, port_str)) = listen_addr.rsplit_once(':') { @@ -264,7 +272,7 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { listen_addr.to_string() }; - #[cfg(not(madsim))] + #[cfg(not(simulation))] { let tcp = TcpListener::bind(&listen_addr).await.context(format!( "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" @@ -301,6 +309,7 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { } } + #[cfg(not(simulation))] Ok(()) } @@ -319,6 +328,7 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { /// Note: There is a small race condition between this check and the actual bind - /// another process could grab the port in between. This is unlikely in practice /// and the actual bind will fail with a clear error if it happens. +#[cfg(not(simulation))] pub fn is_port_available(host: &str, port: u16) -> bool { let requested = match parse_host(host) { Some(r) => r, @@ -353,11 +363,13 @@ pub fn is_port_available(host: &str, port: u16) -> bool { } #[derive(Debug, Clone, Copy)] +#[cfg(not(simulation))] enum RequestedHost { Localhost, Ip(IpAddr), } +#[cfg(not(simulation))] fn parse_host(host: &str) -> Option { let host = host.trim(); @@ -371,6 +383,7 @@ fn parse_host(host: &str) -> Option { host.parse::().ok().map(RequestedHost::Ip) } +#[cfg(not(simulation))] fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { match requested { RequestedHost::Localhost => match listener_addr { @@ -441,6 +454,7 @@ fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { /// Find an available port starting from the requested port. /// Returns the first port that is available on both IPv4 and IPv6. +#[cfg(not(simulation))] fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Option { for offset in 0..max_attempts { let port = requested_port.saturating_add(offset); @@ -455,6 +469,7 @@ fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Op } /// Prompt the user with a yes/no question. Returns true if they answer yes. +#[cfg(not(simulation))] fn prompt_yes_no(question: &str) -> bool { print!("{} [y/N] ", question); io::stdout().flush().ok(); diff --git a/run_dst.sh b/run_dst.sh index 6dd6bee074d..1a54fec27d4 100755 --- a/run_dst.sh +++ b/run_dst.sh @@ -3,5 +3,8 @@ set -euo pipefail cd "$(dirname "$0")" +# madsim-tokio and madsim still use cfg(madsim). SpacetimeDB crates derive +# cfg(simulation) from it in build.rs so source gates can stay simulator-provider +# neutral. Passing only --cfg simulation leaves madsim in std/Tokio mode. export RUSTFLAGS="${RUSTFLAGS:+$RUSTFLAGS }--cfg madsim" exec cargo run -p spacetimedb-dst -- "$@" From 831e9f2db956157f8df13d6d05edd82646c5d974 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 5 May 2026 01:35:43 +0530 Subject: [PATCH 26/74] improve relatioandb_commit: --- crates/dst/README.md | 45 +- crates/dst/src/lib.rs | 2 +- crates/dst/src/properties.rs | 66 +- crates/dst/src/properties/rules.rs | 129 +++- crates/dst/src/properties/runtime.rs | 217 +++--- crates/dst/src/targets/descriptor.rs | 3 +- .../src/targets/relational_db_commitlog.rs | 700 +++++++++--------- .../src/workload/commitlog_ops/generation.rs | 9 +- .../dst/src/workload/commitlog_ops/types.rs | 3 - .../dst/src/workload/module_ops/generation.rs | 4 +- .../dst/src/workload/table_ops/generation.rs | 11 +- crates/dst/src/workload/table_ops/mod.rs | 4 +- crates/dst/src/workload/table_ops/model.rs | 220 ++++-- .../table_ops/scenarios/random_crud.rs | 13 +- crates/dst/src/workload/table_ops/types.rs | 230 +++--- 15 files changed, 962 insertions(+), 694 deletions(-) diff --git a/crates/dst/README.md b/crates/dst/README.md index e12790703e8..d22236d0dc4 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -32,13 +32,13 @@ runner pulls one interaction at a time from a source, sends it to the target, and asks the property runtime to observe the result. ```text -CLI -> TargetDescriptor -> NextInteractionSource -> TargetEngine -> Observation - \-> StreamingProperties -> Outcome +CLI -> TargetDescriptor -> WorkloadSource -> TargetEngine -> Observation + \-> StreamingProperties -> Outcome ``` The core contracts are: -- `NextInteractionSource`: deterministic pull-based interaction stream. +- `WorkloadSource`: deterministic pull-based interaction stream. - `TargetEngine`: target-specific execution and outcome collection. - `StreamingProperties`: reusable property checks over observations and target accessors. @@ -89,19 +89,27 @@ Use this rule of thumb: ## Table Operation Semantics -The table workload intentionally distinguishes similar-looking operations: +The table workload keeps the executable operation language small. Similar +cases converge into physical operations such as `InsertRows`, `DeleteRows`, and +`BeginTx`; the generated interaction also carries a case label for coverage and +debug output. -- `ExactDuplicateInsert`: reinserts a full row that is already visible. For - RelationalDB set semantics, this should be an idempotent no-op. -- `UniqueKeyConflictInsert`: inserts a row with an existing primary id but a - different non-key payload. This should fail with `UniqueConstraintViolation`. -- `DeleteMissing`: deleting an absent row should report no mutation. -- `BeginTxConflict` / `WriteConflictInsert`: expected write-lock failures. -- Query operations (`PointLookup`, `PredicateCount`, `RangeScan`, `FullScan`) - are metamorphic/model oracles, not mutations. +Correctness does not come from that label. The property runtime asks its model +what the physical operation should do: -Keeping these cases separate matters: an exact duplicate and a unique-key -conflict exercise different datastore semantics. +- inserting fresh rows should mutate the table +- inserting an exact visible row should be an idempotent no-op +- inserting an existing primary id with a different payload should report a + unique-key error +- deleting visible rows should mutate the table +- deleting absent rows should report a missing-row error +- beginning or writing behind another writer should report a write conflict +- query operations (`PointLookup`, `PredicateCount`, `RangeScan`, `FullScan`) + should match the model-visible state + +The case label still matters for summaries. It lets a run report that it hit +`ExactDuplicateInsert` or `UniqueKeyConflictInsert`, without teaching the target +or properties to trust generator-provided expectations. ## Current Targets @@ -123,14 +131,15 @@ storage internals. Current property families include: - insert/select and delete/select checks -- expected error matching -- point lookup, predicate count, range scan, and full scan vs `ExpectedModel` +- observed error vs model-predicted error matching +- model-predicted no-op checks +- point lookup, predicate count, range scan, and full scan vs the table oracle - NoREC-style optimizer-vs-direct checks - TLP-style true/false/null partition checks - index range exclusion checks - banking mirror-table invariants - dynamic migration auto-increment checks -- durable replay state vs the expected committed model +- durable replay state vs the oracle committed model ## Fault Injection @@ -203,7 +212,7 @@ Start here: - `src/workload/table_ops`: table interaction language, generation model, and scenarios. - `src/workload/commitlog_ops`: lifecycle layer over table workloads. -- `src/properties.rs`: property catalog and expected model checks. +- `src/properties.rs`: property catalog and oracle/model checks. - `src/targets/relational_db_commitlog.rs`: target adapter for RelationalDB, commitlog durability, fault injection, close/reopen, and replay. - `src/targets/buggified_repo.rs`: deterministic disk-like fault layer. diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index c8e29d0a5c2..92c3afb97e4 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -45,7 +45,7 @@ pub mod client; pub mod config; /// Core traits/runners for pluggable workloads and targets. pub mod core; -/// Reusable semantic properties and expected-model checks. +/// Reusable semantic properties and oracle-model checks. pub(crate) mod properties; mod schema; /// Stable seed and RNG utilities used to make runs reproducible. diff --git a/crates/dst/src/properties.rs b/crates/dst/src/properties.rs index 0e817894912..136c96ac550 100644 --- a/crates/dst/src/properties.rs +++ b/crates/dst/src/properties.rs @@ -3,21 +3,21 @@ //! This module is the boundary between target execution and semantic checking. //! Targets emit observations and implement [`TargetPropertyAccess`]; property //! rules compare those observations against either the target's externally -//! visible state, an expected model, or durable replay state. +//! visible state, an oracle model, or durable replay state. //! //! ## Property Model //! //! A property is a named check over a run. It observes generated interactions, -//! target observations, target-visible state, expected models, and final +//! target observations, target-visible state, oracle models, and final //! outcomes. Failures should include a stable property name and enough context //! to replay the seed or trace. //! //! The current catalog is intentionally small and falls into the same groups //! used by the proposal: //! -//! - Safety properties: `NotCrash`, `ExpectedErrorMatches`, -//! `DurableReplayMatchesModel`, `BankingTablesMatch`, and -//! `DynamicMigrationAutoInc`. +//! - Safety properties: `NotCrash`, `ErrorMatchesOracle`, +//! `NoMutationMatchesModel`, `DurableReplayMatchesModel`, +//! `BankingTablesMatch`, and `DynamicMigrationAutoInc`. //! - Model/oracle properties: `PointLookupMatchesModel`, //! `PredicateCountMatchesModel`, `RangeScanMatchesModel`, //! `FullScanMatchesModel`, and the scenario-specific final table-state check. @@ -39,7 +39,7 @@ use crate::{ schema::{SchemaPlan, SimRow}, workload::{ commitlog_ops::DurableReplaySummary, - table_ops::{ExpectedErrorKind, TableWorkloadInteraction, TableWorkloadOutcome}, + table_ops::{TableErrorKind, TableWorkloadInteraction, TableWorkloadOutcome}, }, }; @@ -49,6 +49,7 @@ pub(crate) use runtime::PropertyRuntime; pub(crate) trait TargetPropertyAccess { fn schema_plan(&self) -> &SchemaPlan; fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String>; + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String>; fn collect_rows_for_table(&self, table: usize) -> Result, String>; fn count_rows(&self, table: usize) -> Result; fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result; @@ -82,17 +83,19 @@ pub(crate) enum PropertyKind { BankingTablesMatch, /// Safety: auto-increment IDs continue advancing after dynamic table migration. DynamicMigrationAutoInc, - /// Safety: durable replay state equals the expected committed model. + /// Safety: durable replay state equals the oracle committed model. DurableReplayMatchesModel, - /// Safety: expected-error interactions fail with the expected error class. - ExpectedErrorMatches, - /// Model/oracle: point lookups match the expected session-visible model. + /// Safety: observed errors match the model-predicted error class. + ErrorMatchesOracle, + /// Safety: model-predicted no-op interactions do not mutate visible state. + NoMutationMatchesModel, + /// Model/oracle: point lookups match the oracle session-visible model. PointLookupMatchesModel, - /// Model/oracle: predicate counts match the expected session-visible model. + /// Model/oracle: predicate counts match the oracle session-visible model. PredicateCountMatchesModel, - /// Model/oracle: range scans match the expected session-visible model. + /// Model/oracle: range scans match the oracle session-visible model. RangeScanMatchesModel, - /// Model/oracle: full scans match the expected session-visible model. + /// Model/oracle: full scans match the oracle session-visible model. FullScanMatchesModel, } @@ -106,21 +109,27 @@ pub(crate) struct DynamicMigrationProbe { } #[derive(Clone, Debug)] -pub(crate) enum TableObservation { - Applied, - RowInserted { - conn: SessionId, +pub(crate) enum TableMutation { + Inserted { table: usize, - row: SimRow, - in_tx: bool, + requested: SimRow, + returned: SimRow, }, - RowDeleted { - conn: SessionId, + Deleted { table: usize, row: SimRow, + }, +} + +#[derive(Clone, Debug)] +pub(crate) enum TableObservation { + Applied, + Mutated { + conn: SessionId, + mutations: Vec, in_tx: bool, }, - ExpectedError(ExpectedErrorKind), + ObservedError(TableErrorKind), PointLookup { conn: SessionId, table: usize, @@ -170,7 +179,7 @@ enum PropertyEvent<'a> { RowInserted { conn: SessionId, table: usize, - row: &'a SimRow, + returned: &'a SimRow, in_tx: bool, }, RowDeleted { @@ -179,9 +188,16 @@ enum PropertyEvent<'a> { row: &'a SimRow, in_tx: bool, }, - ExpectedError { - kind: ExpectedErrorKind, + ObservedError { + observed: TableErrorKind, + predicted: TableErrorKind, + subject: Option<(SessionId, usize)>, + interaction: &'a TableWorkloadInteraction, + }, + NoMutation { + subject: Option<(SessionId, usize)>, interaction: &'a TableWorkloadInteraction, + observation: &'a TableObservation, }, PointLookup { conn: SessionId, diff --git a/crates/dst/src/properties/rules.rs b/crates/dst/src/properties/rules.rs index 9e5a13f9fdd..95acd563ad8 100644 --- a/crates/dst/src/properties/rules.rs +++ b/crates/dst/src/properties/rules.rs @@ -3,11 +3,12 @@ use std::ops::Bound; use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use crate::{ + client::SessionId, schema::{SchemaPlan, SimRow}, - workload::table_ops::{ExpectedResult, TableScenario}, + workload::table_ops::{TableOperation, TableScenario}, }; -use super::{PropertyContext, PropertyEvent, PropertyKind, TargetPropertyAccess}; +use super::{PropertyContext, PropertyEvent, PropertyKind, TableMutation, TableObservation, TargetPropertyAccess}; pub(super) trait PropertyRule { fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { @@ -28,7 +29,8 @@ pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { PropertyKind::BankingTablesMatch => Box::::default(), PropertyKind::DynamicMigrationAutoInc => Box::::default(), PropertyKind::DurableReplayMatchesModel => Box::::default(), - PropertyKind::ExpectedErrorMatches => Box::::default(), + PropertyKind::ErrorMatchesOracle => Box::::default(), + PropertyKind::NoMutationMatchesModel => Box::::default(), PropertyKind::PointLookupMatchesModel => Box::::default(), PropertyKind::PredicateCountMatchesModel => Box::::default(), PropertyKind::RangeScanMatchesModel => Box::::default(), @@ -36,11 +38,11 @@ pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { } } -pub(super) fn expected_table_state_rule(scenario: S, schema: SchemaPlan) -> Box +pub(super) fn oracle_table_state_rule(scenario: S, schema: SchemaPlan) -> Box where S: TableScenario + 'static, { - Box::new(ExpectedTableStateRule::new(scenario, schema)) + Box::new(OracleTableStateRule::new(scenario, schema)) } #[derive(Default)] @@ -48,31 +50,31 @@ struct NotCrashRule; impl PropertyRule for NotCrashRule {} -struct ExpectedTableStateRule { +struct OracleTableStateRule { scenario: S, schema: SchemaPlan, } -impl ExpectedTableStateRule { +impl OracleTableStateRule { fn new(scenario: S, schema: SchemaPlan) -> Self { Self { scenario, schema } } } -impl PropertyRule for ExpectedTableStateRule { +impl PropertyRule for OracleTableStateRule { fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { match event { PropertyEvent::TableWorkloadFinished(outcome) => { let expected_rows = ctx.models.table().committed_rows(); if outcome.final_rows != expected_rows { return Err(format!( - "[ExpectedTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", + "[OracleTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", outcome.final_rows )); } self.scenario .validate_outcome(&self.schema, outcome) - .map_err(|err| format!("[ExpectedTableState] scenario invariant failed: {err}")) + .map_err(|err| format!("[OracleTableState] scenario invariant failed: {err}")) } _ => Ok(()), } @@ -84,14 +86,17 @@ struct InsertSelectRule; impl PropertyRule for InsertSelectRule { fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::RowInserted { conn, table, row, .. } = event else { + let PropertyEvent::RowInserted { + conn, table, returned, .. + } = event + else { return Ok(()); }; - let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + let id = returned.id().ok_or_else(|| "row missing id column".to_string())?; let found = ctx.access.lookup_in_connection(conn, table, id)?; - if found != Some(row.clone()) { + if found != Some(returned.clone()) { return Err(format!( - "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={row:?}, actual={found:?}" + "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={returned:?}, actual={found:?}" )); } Ok(()) @@ -351,22 +356,96 @@ impl PropertyRule for DurableReplayMatchesModelRule { } #[derive(Default)] -struct ExpectedErrorMatchesRule; +struct ErrorMatchesOracleRule; -impl PropertyRule for ExpectedErrorMatchesRule { - fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::ExpectedError { kind, interaction } = event else { +impl PropertyRule for ErrorMatchesOracleRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::ObservedError { + observed, + predicted, + subject, + interaction, + } = event + else { + return Ok(()); + }; + if observed != predicted { + return Err(format!( + "[ErrorMatchesOracle] observed {observed:?}, but model predicted {predicted:?}: {interaction:?}", + )); + } + if let Some((conn, table)) = subject { + assert_visible_rows_match_model(ctx, conn, table, "[ErrorDoesNotMutate]", interaction)?; + } + Ok(()) + } +} + +#[derive(Default)] +struct NoMutationMatchesModelRule; + +impl PropertyRule for NoMutationMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::NoMutation { + interaction, + subject, + observation, + } = event + else { return Ok(()); }; - if interaction.expected == ExpectedResult::Err(kind) { - Ok(()) - } else { - Err(format!( - "[ExpectedErrorMatches] observed {kind:?}, but interaction expected {:?}: {interaction:?}", - interaction.expected - )) + if let TableOperation::InsertRows { table, rows, .. } = &interaction.op + && let TableObservation::Mutated { mutations, .. } = observation + { + if mutations.len() != rows.len() { + return Err(format!( + "[NoMutationMatchesModel] insert no-op returned wrong mutation count: expected={}, actual={}; interaction={interaction:?}", + rows.len(), + mutations.len() + )); + } + for (row, mutation) in rows.iter().zip(mutations) { + let TableMutation::Inserted { + table: observed_table, + requested, + returned, + } = mutation + else { + return Err(format!( + "[NoMutationMatchesModel] insert no-op returned non-insert mutation: {mutation:?}; interaction={interaction:?}" + )); + }; + if observed_table != table || requested != row || returned != row { + return Err(format!( + "[NoMutationMatchesModel] no-op insert returned row mismatch: expected table={table}, row={row:?}; observed table={observed_table}, requested={requested:?}, returned={returned:?}; interaction={interaction:?}" + )); + } + } + } + + if let Some((conn, table)) = subject { + assert_visible_rows_match_model(ctx, conn, table, "[NoMutationMatchesModel]", interaction)?; } + Ok(()) + } +} + +fn assert_visible_rows_match_model( + ctx: &PropertyContext<'_>, + conn: SessionId, + table: usize, + property: &str, + interaction: &crate::workload::table_ops::TableWorkloadInteraction, +) -> Result<(), String> { + let mut actual = ctx.access.collect_rows_in_connection(conn, table)?; + actual.sort_by_key(|row| row.id().unwrap_or_default()); + let expected = ctx.models.table().visible_rows(conn, table); + if actual != expected { + return Err(format!( + "{property} visible rows changed unexpectedly on conn={conn}, table={table}: expected={expected:?}, actual={actual:?}; interaction={interaction:?}" + )); } + Ok(()) } #[derive(Default)] diff --git a/crates/dst/src/properties/runtime.rs b/crates/dst/src/properties/runtime.rs index e7224b4df5c..d42dce8e467 100644 --- a/crates/dst/src/properties/runtime.rs +++ b/crates/dst/src/properties/runtime.rs @@ -9,16 +9,16 @@ use crate::{ workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, table_ops::{ - ExpectedErrorKind, ExpectedModel, ExpectedResult, TableOperation, TableScenario, TableWorkloadInteraction, + PredictedOutcome, TableErrorKind, TableOracle, TableScenario, TableWorkloadInteraction, TableWorkloadOutcome, }, }, }; use super::{ - rules::{expected_table_state_rule, rule_for_kind, PropertyRule}, - CommitlogObservation, DynamicMigrationProbe, PropertyContext, PropertyEvent, PropertyKind, TableObservation, - TargetPropertyAccess, + rules::{oracle_table_state_rule, rule_for_kind, PropertyRule}, + CommitlogObservation, DynamicMigrationProbe, PropertyContext, PropertyEvent, PropertyKind, TableMutation, + TableObservation, TargetPropertyAccess, }; #[derive(Clone, Debug)] @@ -28,14 +28,14 @@ pub(super) struct PropertyModels { #[derive(Clone, Debug)] pub(super) struct TableModel { - expected: ExpectedModel, + oracle: TableOracle, } impl PropertyModels { pub(super) fn new(table_count: usize, num_connections: usize) -> Self { Self { table: TableModel { - expected: ExpectedModel::new(table_count, num_connections), + oracle: TableOracle::new(table_count, num_connections), }, } } @@ -44,22 +44,26 @@ impl PropertyModels { &self.table } + fn predict(&self, interaction: &TableWorkloadInteraction) -> Result { + self.table.oracle.predict(&interaction.op) + } + fn apply(&mut self, interaction: &TableWorkloadInteraction) { - self.table.expected.apply(interaction); + self.table.oracle.apply(&interaction.op); } } impl TableModel { pub(super) fn committed_rows(&self) -> Vec> { - self.expected.clone().committed_rows() + self.oracle.clone().committed_rows() } pub(super) fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { - self.expected.lookup_by_id(conn, table, id) + self.oracle.lookup_by_id(conn, table, id) } pub(super) fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { - self.expected.predicate_count(conn, table, col, value) + self.oracle.predicate_count(conn, table, col, value) } pub(super) fn range_scan( @@ -70,11 +74,17 @@ impl TableModel { lower: &Bound, upper: &Bound, ) -> Vec { - self.expected.range_scan(conn, table, cols, lower, upper) + self.oracle.range_scan(conn, table, cols, lower, upper) } pub(super) fn full_scan(&self, conn: SessionId, table: usize) -> Vec { - let mut rows = self.expected.visible_rows(conn, table); + let mut rows = self.oracle.visible_rows(conn, table); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + rows + } + + pub(super) fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let mut rows = self.oracle.visible_rows(conn, table); rows.sort_by_key(|row| row.id().unwrap_or_default()); rows } @@ -105,7 +115,7 @@ impl PropertyRuntime { }; runtime .rules - .push(RuleEntry::new(expected_table_state_rule(scenario, schema))); + .push(RuleEntry::new(oracle_table_state_rule(scenario, schema))); runtime } @@ -114,29 +124,7 @@ impl PropertyRuntime { access: &dyn TargetPropertyAccess, interaction: &TableWorkloadInteraction, ) -> Result<(), String> { - match &interaction.op { - TableOperation::BeginTx { .. } - | TableOperation::CommitTx { .. } - | TableOperation::RollbackTx { .. } - | TableOperation::BeginReadTx { .. } - | TableOperation::ReleaseReadTx { .. } => self.models.apply(interaction), - TableOperation::BatchInsert { .. } - | TableOperation::BatchDelete { .. } - | TableOperation::Reinsert { .. } - | TableOperation::AddColumn { .. } - | TableOperation::AddIndex { .. } => self.models.apply(interaction), - TableOperation::Insert { .. } - | TableOperation::Delete { .. } - | TableOperation::BeginTxConflict { .. } - | TableOperation::WriteConflictInsert { .. } - | TableOperation::ExactDuplicateInsert { .. } - | TableOperation::UniqueKeyConflictInsert { .. } - | TableOperation::DeleteMissing { .. } - | TableOperation::PointLookup { .. } - | TableOperation::PredicateCount { .. } - | TableOperation::RangeScan { .. } - | TableOperation::FullScan { .. } => {} - } + self.models.apply(interaction); let ctx = PropertyContext { access, models: &self.models, @@ -147,46 +135,63 @@ impl PropertyRuntime { Ok(()) } - pub fn on_insert( + pub fn on_mutations( &mut self, access: &dyn TargetPropertyAccess, - _step: u64, conn: SessionId, - table: usize, - row: &SimRow, + mutations: &[TableMutation], in_tx: bool, ) -> Result<(), String> { - self.models - .apply(&TableWorkloadInteraction::insert(conn, table, row.clone())); let ctx = PropertyContext { access, models: &self.models, }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::RowInserted { - conn, + + for mutation in mutations { + match mutation { + TableMutation::Inserted { table, - row, - in_tx, - }, - )?; + requested: _, + returned, + } => { + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::RowInserted { + conn, + table: *table, + returned, + in_tx, + }, + )?; + } + } + TableMutation::Deleted { table, row } => { + for entry in &mut self.rules { + entry.rule.observe( + &ctx, + PropertyEvent::RowDeleted { + conn, + table: *table, + row, + in_tx, + }, + )?; + } + } + } } Ok(()) } - pub fn on_delete( + pub fn on_observed_error( &mut self, access: &dyn TargetPropertyAccess, - _step: u64, - conn: SessionId, - table: usize, - row: &SimRow, - in_tx: bool, + observed: TableErrorKind, + predicted: TableErrorKind, + subject: Option<(SessionId, usize)>, + interaction: &TableWorkloadInteraction, ) -> Result<(), String> { - self.models - .apply(&TableWorkloadInteraction::delete(conn, table, row.clone())); let ctx = PropertyContext { access, models: &self.models, @@ -194,37 +199,37 @@ impl PropertyRuntime { for entry in &mut self.rules { entry.rule.observe( &ctx, - PropertyEvent::RowDeleted { - conn, - table, - row, - in_tx, + PropertyEvent::ObservedError { + observed, + predicted, + subject, + interaction, }, )?; } Ok(()) } - pub fn on_expected_error( + pub fn on_no_mutation( &mut self, access: &dyn TargetPropertyAccess, - kind: ExpectedErrorKind, + subject: Option<(SessionId, usize)>, interaction: &TableWorkloadInteraction, + observation: &TableObservation, ) -> Result<(), String> { - if interaction.expected != ExpectedResult::Err(kind) { - return Err(format!( - "[ExpectedErrorMatches] expected {:?}, observed {kind:?} for {interaction:?}", - interaction.expected - )); - } let ctx = PropertyContext { access, models: &self.models, }; for entry in &mut self.rules { - entry - .rule - .observe(&ctx, PropertyEvent::ExpectedError { kind, interaction })?; + entry.rule.observe( + &ctx, + PropertyEvent::NoMutation { + subject, + interaction, + observation, + }, + )?; } Ok(()) } @@ -397,21 +402,39 @@ impl PropertyRuntime { interaction: &TableWorkloadInteraction, observation: &TableObservation, ) -> Result<(), String> { + let prediction = self.models.predict(interaction)?; + match (&prediction, observed_error_kind(observation)) { + (PredictedOutcome::Error { kind, subject }, Some(observed)) => { + self.on_observed_error(access, observed, *kind, *subject, interaction)?; + return Ok(()); + } + (PredictedOutcome::Error { kind, .. }, None) => { + return Err(format!( + "[ErrorMatchesOracle] expected {kind:?}, observed successful result {observation:?} for {interaction:?}" + )); + } + (PredictedOutcome::Applied, Some(observed)) => { + return Err(format!( + "[ErrorMatchesOracle] expected success, observed {observed:?} for {interaction:?}" + )); + } + (PredictedOutcome::Applied, None) => self.on_table_interaction(access, interaction)?, + (PredictedOutcome::NoMutation { subject: _ }, Some(observed)) => { + return Err(format!( + "[NoMutationMatchesModel] expected no mutation, observed {observed:?} for {interaction:?}" + )); + } + (PredictedOutcome::NoMutation { subject }, None) => { + self.on_no_mutation(access, *subject, interaction, observation)?; + } + } + match observation { TableObservation::Applied => {} - TableObservation::RowInserted { - conn, - table, - row, - in_tx, - } => self.on_insert(access, 0, *conn, *table, row, *in_tx)?, - TableObservation::RowDeleted { - conn, - table, - row, - in_tx, - } => self.on_delete(access, 0, *conn, *table, row, *in_tx)?, - TableObservation::ExpectedError(kind) => self.on_expected_error(access, *kind, interaction)?, + TableObservation::Mutated { conn, mutations, in_tx } => { + self.on_mutations(access, *conn, mutations, *in_tx)? + } + TableObservation::ObservedError(_) => {} TableObservation::PointLookup { conn, table, @@ -437,8 +460,6 @@ impl PropertyRuntime { TableObservation::CommitOrRollback => {} } - self.on_table_interaction(access, interaction)?; - if matches!(observation, TableObservation::CommitOrRollback) { self.on_commit_or_rollback(access)?; } @@ -502,7 +523,8 @@ impl Default for PropertyRuntime { PropertyKind::BankingTablesMatch, PropertyKind::DynamicMigrationAutoInc, PropertyKind::DurableReplayMatchesModel, - PropertyKind::ExpectedErrorMatches, + PropertyKind::ErrorMatchesOracle, + PropertyKind::NoMutationMatchesModel, PropertyKind::PointLookupMatchesModel, PropertyKind::PredicateCountMatchesModel, PropertyKind::RangeScanMatchesModel, @@ -510,3 +532,16 @@ impl Default for PropertyRuntime { ]) } } + +fn observed_error_kind(observation: &TableObservation) -> Option { + match observation { + TableObservation::ObservedError(kind) => Some(*kind), + TableObservation::Applied + | TableObservation::Mutated { .. } + | TableObservation::PointLookup { .. } + | TableObservation::PredicateCount { .. } + | TableObservation::RangeScan { .. } + | TableObservation::FullScan { .. } + | TableObservation::CommitOrRollback => None, + } +} diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index 39c4f29d7ab..b1dca7d2fdb 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -55,7 +55,7 @@ fn format_relational_db_commitlog_outcome( "\n", "schema: tables={} columns={} max_columns={} indexes={} extra_indexes={}\n", "durability: durable_commits={} replay_tables={}\n", - "interactions: table={} creates={} drops={} migrates={} syncs={} reopens={} reopen_skipped={} skipped={}\n", + "interactions: table={} creates={} drops={} migrates={} reopens={} reopen_skipped={} skipped={}\n", "table_ops:\n", " tx_control: begin={} commit={} rollback={} begin_read={} release_read={} begin_conflict={} write_conflict={}\n", " writes: insert={} delete={} exact_dup={} unique_conflict={} missing_delete={} batch_insert={} batch_delete={} reinsert={}\n", @@ -79,7 +79,6 @@ fn format_relational_db_commitlog_outcome( outcome.interactions.create_dynamic_table, outcome.interactions.drop_dynamic_table, outcome.interactions.migrate_dynamic_table, - outcome.interactions.chaos_sync, outcome.interactions.close_reopen_applied, outcome.interactions.close_reopen_skipped, outcome.interactions.skipped, diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index df7a3d637cb..100c65ea55f 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -32,7 +32,8 @@ use crate::{ config::{CommitlogFaultProfile, RunConfig}, core::{self, TargetEngine}, properties::{ - CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableObservation, TargetPropertyAccess, + CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableMutation, TableObservation, + TargetPropertyAccess, }, schema::{SchemaPlan, SimRow}, seed::DstSeed, @@ -41,7 +42,7 @@ use crate::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, table_ops::{ - ConnectionWriteState, ExpectedErrorKind, TableOperation, TableScenario, TableScenarioId, + ConnectionWriteState, TableErrorKind, TableInteractionCase, TableOperation, TableScenario, TableScenarioId, TableWorkloadInteraction, TableWorkloadOutcome, }, }, @@ -128,7 +129,6 @@ impl RunStats { CommitlogInteraction::CreateDynamicTable { .. } => self.interactions.create_dynamic_table += 1, CommitlogInteraction::DropDynamicTable { .. } => self.interactions.drop_dynamic_table += 1, CommitlogInteraction::MigrateDynamicTable { .. } => self.interactions.migrate_dynamic_table += 1, - CommitlogInteraction::ChaosSync => self.interactions.chaos_sync += 1, CommitlogInteraction::CloseReopen => self.interactions.close_reopen_requested += 1, } } @@ -148,29 +148,29 @@ impl RunStats { } } - fn record_table_operation(&mut self, op: &TableOperation) { - match op { - TableOperation::BeginTx { .. } => self.table_ops.begin_tx += 1, - TableOperation::CommitTx { .. } => self.table_ops.commit_tx += 1, - TableOperation::RollbackTx { .. } => self.table_ops.rollback_tx += 1, - TableOperation::BeginReadTx { .. } => self.table_ops.begin_read_tx += 1, - TableOperation::ReleaseReadTx { .. } => self.table_ops.release_read_tx += 1, - TableOperation::BeginTxConflict { .. } => self.table_ops.begin_tx_conflict += 1, - TableOperation::WriteConflictInsert { .. } => self.table_ops.write_conflict_insert += 1, - TableOperation::Insert { .. } => self.table_ops.insert += 1, - TableOperation::Delete { .. } => self.table_ops.delete += 1, - TableOperation::ExactDuplicateInsert { .. } => self.table_ops.exact_duplicate_insert += 1, - TableOperation::UniqueKeyConflictInsert { .. } => self.table_ops.unique_key_conflict_insert += 1, - TableOperation::DeleteMissing { .. } => self.table_ops.delete_missing += 1, - TableOperation::BatchInsert { .. } => self.table_ops.batch_insert += 1, - TableOperation::BatchDelete { .. } => self.table_ops.batch_delete += 1, - TableOperation::Reinsert { .. } => self.table_ops.reinsert += 1, - TableOperation::AddColumn { .. } => self.table_ops.add_column += 1, - TableOperation::AddIndex { .. } => self.table_ops.add_index += 1, - TableOperation::PointLookup { .. } => self.table_ops.point_lookup += 1, - TableOperation::PredicateCount { .. } => self.table_ops.predicate_count += 1, - TableOperation::RangeScan { .. } => self.table_ops.range_scan += 1, - TableOperation::FullScan { .. } => self.table_ops.full_scan += 1, + fn record_table_operation(&mut self, case: TableInteractionCase) { + match case { + TableInteractionCase::BeginTx => self.table_ops.begin_tx += 1, + TableInteractionCase::CommitTx => self.table_ops.commit_tx += 1, + TableInteractionCase::RollbackTx => self.table_ops.rollback_tx += 1, + TableInteractionCase::BeginReadTx => self.table_ops.begin_read_tx += 1, + TableInteractionCase::ReleaseReadTx => self.table_ops.release_read_tx += 1, + TableInteractionCase::BeginTxConflict => self.table_ops.begin_tx_conflict += 1, + TableInteractionCase::WriteConflictInsert => self.table_ops.write_conflict_insert += 1, + TableInteractionCase::Insert => self.table_ops.insert += 1, + TableInteractionCase::Delete => self.table_ops.delete += 1, + TableInteractionCase::ExactDuplicateInsert => self.table_ops.exact_duplicate_insert += 1, + TableInteractionCase::UniqueKeyConflictInsert => self.table_ops.unique_key_conflict_insert += 1, + TableInteractionCase::DeleteMissing => self.table_ops.delete_missing += 1, + TableInteractionCase::BatchInsert => self.table_ops.batch_insert += 1, + TableInteractionCase::BatchDelete => self.table_ops.batch_delete += 1, + TableInteractionCase::Reinsert => self.table_ops.reinsert += 1, + TableInteractionCase::AddColumn => self.table_ops.add_column += 1, + TableInteractionCase::AddIndex => self.table_ops.add_index += 1, + TableInteractionCase::PointLookup => self.table_ops.point_lookup += 1, + TableInteractionCase::PredicateCount => self.table_ops.predicate_count += 1, + TableInteractionCase::RangeScan => self.table_ops.range_scan += 1, + TableInteractionCase::FullScan => self.table_ops.full_scan += 1, } } @@ -315,17 +315,15 @@ impl RelationalDbEngine { async fn execute(&mut self, interaction: &CommitlogInteraction) -> Result { self.step = self.step.saturating_add(1); self.stats.record_interaction_requested(interaction); - let force_sync_after = matches!(interaction, CommitlogInteraction::ChaosSync); let observation = match interaction { CommitlogInteraction::Table(op) => self.execute_table_op(op).map(CommitlogObservation::Table), CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), - CommitlogInteraction::ChaosSync => Ok(CommitlogObservation::Applied), CommitlogInteraction::CloseReopen => self.close_and_reopen().await, }?; if !matches!(interaction, CommitlogInteraction::CloseReopen) { - self.wait_for_requested_durability(force_sync_after).await?; + self.wait_for_requested_durability(false).await?; } self.stats.record_interaction_result(interaction, &observation); Ok(observation) @@ -449,35 +447,15 @@ impl RelationalDbEngine { } fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result { - let observation = self.execute_table_op_inner(interaction)?; - self.stats.record_table_operation(&interaction.op); + let observation = self.execute_table_op_inner(&interaction.op)?; + self.stats.record_table_operation(interaction.case); Ok(observation) } - fn execute_table_op_inner(&mut self, interaction: &TableWorkloadInteraction) -> Result { - trace!(step = self.step, ?interaction, "table interaction"); - match &interaction.op { - TableOperation::BeginTx { conn } => { - self.execution.ensure_known_connection(*conn)?; - if self.read_tx_by_connection[conn.as_index()].is_some() { - return Err(format!("connection {conn} already has open read transaction")); - } - if self.execution.tx_by_connection[conn.as_index()].is_some() { - return Err(format!("connection {conn} already has open transaction")); - } - if let Some(owner) = self.execution.active_writer { - return Err(format!( - "connection {conn} cannot begin write transaction while connection {owner} owns lock" - )); - } - self.execution.tx_by_connection[conn.as_index()] = Some( - self.db()? - .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), - ); - self.execution.active_writer = Some(*conn); - self.stats.transactions.explicit_begin += 1; - Ok(TableObservation::Applied) - } + fn execute_table_op_inner(&mut self, op: &TableOperation) -> Result { + trace!(step = self.step, ?op, "table interaction"); + match op { + TableOperation::BeginTx { conn } => self.begin_write_tx(*conn), TableOperation::BeginReadTx { conn } => { self.execution.ensure_known_connection(*conn)?; if self.execution.tx_by_connection[conn.as_index()].is_some() { @@ -499,33 +477,6 @@ impl RelationalDbEngine { let _ = self.db()?.release_tx(tx); Ok(TableObservation::Applied) } - TableOperation::BeginTxConflict { owner, conn } => { - self.expect_write_conflict(*owner, *conn)?; - Ok(TableObservation::ExpectedError(ExpectedErrorKind::WriteConflict)) - } - TableOperation::WriteConflictInsert { - owner, - conn, - table, - row, - } => { - self.expect_write_conflict(*owner, *conn)?; - let err = self - .with_mut_tx(*conn, |engine, tx| { - let table_id = engine.table_id_for_index(*table)?; - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - engine - .db()? - .insert(tx, table_id, &bsatn) - .map_err(|err| format!("conflicting insert unexpectedly reached datastore: {err}"))?; - Ok(()) - }) - .expect_err("active writer should reject conflicting auto-commit write"); - if !err.contains("owns lock") { - return Err(format!("write conflict returned wrong error: {err}")); - } - Ok(TableObservation::ExpectedError(ExpectedErrorKind::WriteConflict)) - } TableOperation::CommitTx { conn } => { self.execution.ensure_writer_owner(*conn, "commit")?; let tx = self.execution.tx_by_connection[conn.as_index()] @@ -550,184 +501,8 @@ impl RelationalDbEngine { self.stats.transactions.explicit_rollback += 1; Ok(TableObservation::CommitOrRollback) } - TableOperation::Insert { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); - let inserted_row = self.with_mut_tx(*conn, |engine, tx| { - let table_id = *engine - .base_table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - let (_, row_ref, _) = engine - .db()? - .insert(tx, table_id, &bsatn) - .map_err(|err| format!("insert failed: {err}"))?; - Ok(SimRow::from_product_value(row_ref.to_product_value())) - })?; - if !in_tx { - self.refresh_observed_durable_offset(false)?; - } - Ok(TableObservation::RowInserted { - conn: *conn, - table: *table, - row: inserted_row, - in_tx, - }) - } - TableOperation::Delete { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); - self.with_mut_tx(*conn, |engine, tx| { - let table_id = *engine - .base_table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let deleted = engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()]); - if deleted != 1 { - return Err(format!("delete expected 1 row, got {deleted}")); - } - Ok(()) - })?; - if !in_tx { - self.refresh_observed_durable_offset(false)?; - } - Ok(TableObservation::RowDeleted { - conn: *conn, - table: *table, - row: row.clone(), - in_tx, - }) - } - TableOperation::ExactDuplicateInsert { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); - let before = self.collect_rows_in_connection(*conn, *table)?; - let inserted_row = self.with_mut_tx(*conn, |engine, tx| { - let table_id = engine.table_id_for_index(*table)?; - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - let (_, row_ref, _) = engine - .db()? - .insert(tx, table_id, &bsatn) - .map_err(|err| format!("exact duplicate insert failed: {err}"))?; - Ok(SimRow::from_product_value(row_ref.to_product_value())) - })?; - if !in_tx { - self.refresh_observed_durable_offset(false)?; - } - let after = self.collect_rows_in_connection(*conn, *table)?; - if &inserted_row != row { - return Err(format!( - "[ExactDuplicateInsertNoOp] returned row mismatch: expected={row:?}, actual={inserted_row:?}; interaction={interaction:?}" - )); - } - if after != before { - return Err(format!( - "[ExactDuplicateInsertNoOp] changed visible rows: before={before:?}, after={after:?}; interaction={interaction:?}" - )); - } - Ok(TableObservation::Applied) - } - TableOperation::UniqueKeyConflictInsert { conn, table, row } => { - let outcome = self.with_mut_tx(*conn, |engine, tx| { - let table_id = *engine - .base_table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - match engine.db()?.insert(tx, table_id, &bsatn) { - Ok(_) => Ok(Err("unique-key conflict insert unexpectedly succeeded".to_string())), - Err(err) if is_unique_constraint_violation(&err) => Ok(Ok(())), - Err(err) => Ok(Err(format!( - "unique-key conflict insert returned wrong error: expected={:?}, actual={err}", - ExpectedErrorKind::UniqueConstraintViolation - ))), - } - })?; - match outcome { - Ok(()) => Ok(TableObservation::ExpectedError( - ExpectedErrorKind::UniqueConstraintViolation, - )), - Err(err) => Err(format!("[ExpectedErrorMatches] {err}; interaction={interaction:?}")), - } - } - TableOperation::DeleteMissing { conn, table, row } => { - let deleted = self.with_mut_tx(*conn, |engine, tx| { - let table_id = *engine - .base_table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - Ok(engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()])) - })?; - if deleted == 0 { - Ok(TableObservation::ExpectedError(ExpectedErrorKind::MissingRow)) - } else { - Err(format!( - "[ExpectedErrorDoesNotMutate] missing delete removed {deleted} rows; interaction={interaction:?}" - )) - } - } - TableOperation::BatchInsert { conn, table, rows } => { - let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); - self.with_mut_tx(*conn, |engine, tx| { - let table_id = *engine - .base_table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - for row in rows { - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - engine - .db()? - .insert(tx, table_id, &bsatn) - .map_err(|err| format!("batch insert failed: {err}"))?; - } - Ok(()) - })?; - if !in_tx { - self.refresh_observed_durable_offset(false)?; - } - Ok(TableObservation::Applied) - } - TableOperation::BatchDelete { conn, table, rows } => { - let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); - self.with_mut_tx(*conn, |engine, tx| { - let table_id = *engine - .base_table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - for row in rows { - let deleted = engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()]); - if deleted != 1 { - return Err(format!("batch delete expected 1 row, got {deleted} for row={row:?}")); - } - } - Ok(()) - })?; - if !in_tx { - self.refresh_observed_durable_offset(false)?; - } - Ok(TableObservation::Applied) - } - TableOperation::Reinsert { conn, table, row } => { - let in_tx = self.execution.tx_by_connection[conn.as_index()].is_some(); - self.with_mut_tx(*conn, |engine, tx| { - let table_id = *engine - .base_table_ids - .get(*table) - .ok_or_else(|| format!("table {table} out of range"))?; - let deleted = engine.db()?.delete_by_rel(tx, table_id, [row.to_product_value()]); - if deleted != 1 { - return Err(format!("reinsert delete expected 1 row, got {deleted} for row={row:?}")); - } - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - engine - .db()? - .insert(tx, table_id, &bsatn) - .map_err(|err| format!("reinsert insert failed: {err}"))?; - Ok(()) - })?; - if !in_tx { - self.refresh_observed_durable_offset(false)?; - } - Ok(TableObservation::Applied) - } + TableOperation::InsertRows { conn, table, rows } => self.execute_insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.execute_delete_rows(*conn, *table, rows), TableOperation::AddColumn { conn, table, @@ -831,6 +606,150 @@ impl RelationalDbEngine { } } + fn begin_write_tx(&mut self, conn: SessionId) -> Result { + self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open read transaction")); + } + if self.execution.tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open transaction")); + } + if self.execution.active_writer.is_some() { + return Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)); + } + self.execution.tx_by_connection[conn.as_index()] = Some( + self.db()? + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), + ); + self.execution.active_writer = Some(conn); + self.stats.transactions.explicit_begin += 1; + Ok(TableObservation::Applied) + } + + fn execute_insert_rows( + &mut self, + conn: SessionId, + table: usize, + rows: &[SimRow], + ) -> Result { + let in_tx = self.is_in_write_tx(conn); + let outcome = self.with_mut_tx_observed(conn, |engine, tx| { + let mut mutations = Vec::with_capacity(rows.len()); + for row in rows { + match engine.try_insert_base_row(tx, table, row)? { + Ok(returned) => mutations.push(TableMutation::Inserted { + table, + requested: row.clone(), + returned, + }), + Err(err) if is_unique_constraint_violation(&err) => { + return Ok(Err(TableErrorKind::UniqueConstraintViolation)); + } + Err(err) => return Err(format!("insert failed: {err}")), + } + } + Ok(Ok(mutations)) + }); + self.mutation_observation(conn, in_tx, outcome) + } + + fn execute_delete_rows( + &mut self, + conn: SessionId, + table: usize, + rows: &[SimRow], + ) -> Result { + let in_tx = self.is_in_write_tx(conn); + let outcome = self.with_mut_tx_observed(conn, |engine, tx| { + let mut mutations = Vec::with_capacity(rows.len()); + for row in rows { + match engine.delete_base_row_count(tx, table, row)? { + 0 => return Ok(Err(TableErrorKind::MissingRow)), + 1 => mutations.push(TableMutation::Deleted { + table, + row: row.clone(), + }), + deleted => { + return Err(format!("delete for row={row:?} affected {deleted} rows")); + } + } + } + Ok(Ok(mutations)) + }); + self.mutation_observation(conn, in_tx, outcome) + } + + fn mutation_observation( + &mut self, + conn: SessionId, + in_tx: bool, + outcome: Result, TableErrorKind>, String>, + ) -> Result { + match outcome { + Ok(Ok(mutations)) => { + self.refresh_if_auto_commit(in_tx)?; + Ok(TableObservation::Mutated { conn, mutations, in_tx }) + } + Ok(Err(kind)) => Ok(TableObservation::ObservedError(kind)), + Err(err) if is_write_conflict_error(&err) => { + Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)) + } + Err(err) => Err(err), + } + } + + fn with_mut_tx_observed( + &mut self, + conn: SessionId, + mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result, String>, + ) -> Result, String> { + self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} cannot write while read transaction is open")); + } + if self.execution.tx_by_connection[conn.as_index()].is_some() { + let mut tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; + let result = f(self, &mut tx); + self.execution.tx_by_connection[conn.as_index()] = Some(tx); + return result; + } + + if self.execution.active_writer.is_some() { + return Ok(Err(TableErrorKind::WriteConflict)); + } + + let mut tx = self + .db()? + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + self.execution.active_writer = Some(conn); + let value = match f(self, &mut tx) { + Ok(Ok(value)) => value, + Ok(Err(kind)) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Ok(Err(kind)); + } + Err(err) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Err(err); + } + }; + let committed = match self.db()?.commit_tx(tx) { + Ok(committed) => committed, + Err(err) => { + self.execution.active_writer = None; + return Err(format!("auto-commit write failed: {err}")); + } + }; + self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); + self.execution.active_writer = None; + self.stats.transactions.auto_commit += 1; + Ok(Ok(value)) + } + fn with_mut_tx( &mut self, conn: SessionId, @@ -844,9 +763,9 @@ impl RelationalDbEngine { let mut tx = self.execution.tx_by_connection[conn.as_index()] .take() .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; - let value = f(self, &mut tx)?; + let result = f(self, &mut tx); self.execution.tx_by_connection[conn.as_index()] = Some(tx); - return Ok(value); + return result; } if let Some(owner) = self.execution.active_writer { @@ -859,35 +778,65 @@ impl RelationalDbEngine { .db()? .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); self.execution.active_writer = Some(conn); - let value = f(self, &mut tx)?; - let committed = self - .db()? - .commit_tx(tx) - .map_err(|err| format!("auto-commit write failed: {err}"))?; + let value = match f(self, &mut tx) { + Ok(value) => value, + Err(err) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Err(err); + } + }; + let committed = match self.db()?.commit_tx(tx) { + Ok(committed) => committed, + Err(err) => { + self.execution.active_writer = None; + return Err(format!("auto-commit write failed: {err}")); + } + }; self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); self.execution.active_writer = None; self.stats.transactions.auto_commit += 1; Ok(value) } - fn expect_write_conflict(&self, owner: SessionId, conn: SessionId) -> Result<(), String> { - self.execution.ensure_known_connection(owner)?; - self.execution.ensure_known_connection(conn)?; - if owner == conn { - return Err(format!("write conflict owner and contender are both connection {conn}")); - } - if self.execution.active_writer != Some(owner) { - return Err(format!( - "expected connection {owner} to own write lock, actual={:?}", - self.execution.active_writer - )); - } - if self.read_tx_by_connection[conn.as_index()].is_some() { - return Err(format!( - "conflicting connection {conn} unexpectedly has a read transaction" - )); - } - Ok(()) + fn try_insert_base_row( + &self, + tx: &mut RelMutTx, + table: usize, + row: &SimRow, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + self.try_insert_row(tx, table_id, row) + } + + fn try_insert_row( + &self, + tx: &mut RelMutTx, + table_id: TableId, + row: &SimRow, + ) -> Result, String> { + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + Ok(match self.db()?.insert(tx, table_id, &bsatn) { + Ok((_, row_ref, _)) => Ok(SimRow::from_product_value(row_ref.to_product_value())), + Err(err) => Err(err), + }) + } + + fn insert_row( + &self, + tx: &mut RelMutTx, + table_id: TableId, + row: &SimRow, + context: impl Into, + ) -> Result { + let context = context.into(); + self.try_insert_row(tx, table_id, row)? + .map_err(|err| format!("{context}: {err}")) + } + + fn delete_base_row_count(&self, tx: &mut RelMutTx, table: usize, row: &SimRow) -> Result { + let table_id = self.table_id_for_index(table)?; + Ok(self.db()?.delete_by_rel(tx, table_id, [row.to_product_value()])) } fn create_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { @@ -914,11 +863,12 @@ impl RelationalDbEngine { let seed_row = SimRow { values: vec![AlgebraicValue::I64(0), AlgebraicValue::U64(slot as u64)], }; - let bsatn = seed_row.to_bsatn().map_err(|err| err.to_string())?; - engine - .db()? - .insert(tx, table_id, &bsatn) - .map_err(|err| format!("seed dynamic table auto-inc insert failed for slot={slot}: {err}"))?; + engine.insert_row( + tx, + table_id, + &seed_row, + format!("seed dynamic table auto-inc insert failed for slot={slot}"), + )?; engine.dynamic_tables.insert( slot, DynamicTableState { @@ -993,12 +943,12 @@ impl RelationalDbEngine { .collect::>(); let probe_row = dynamic_probe_row(slot, to_version); - let bsatn = probe_row.to_bsatn().map_err(|err| err.to_string())?; - let (_, inserted_ref, _) = engine - .db()? - .insert(tx, new_table_id, &bsatn) - .map_err(|err| format!("migrate auto-inc probe failed for slot={slot}: {err}"))?; - let inserted = SimRow::from_product_value(inserted_ref.to_product_value()); + let inserted = engine.insert_row( + tx, + new_table_id, + &probe_row, + format!("migrate auto-inc probe failed for slot={slot}"), + )?; engine.dynamic_tables.insert( slot, DynamicTableState { @@ -1055,6 +1005,20 @@ impl RelationalDbEngine { } } + fn is_in_write_tx(&self, conn: SessionId) -> bool { + self.execution + .tx_by_connection + .get(conn.as_index()) + .is_some_and(Option::is_some) + } + + fn refresh_if_auto_commit(&mut self, in_tx: bool) -> Result<(), String> { + if !in_tx { + self.refresh_observed_durable_offset(false)?; + } + Ok(()) + } + fn table_id_for_index(&self, table: usize) -> Result { self.base_table_ids .get(table) @@ -1062,6 +1026,65 @@ impl RelationalDbEngine { .ok_or_else(|| format!("table {table} out of range")) } + fn with_fresh_read_tx(&self, f: impl FnOnce(&RelationalDB, &RelTx) -> Result) -> Result { + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); + let result = f(db, &tx); + let _ = db.release_tx(tx); + result + } + + fn collect_rows_in_fresh_tx(&self, table_id: TableId, context: &'static str) -> Result, String> { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter(tx, table_id) + .map_err(|err| format!("{context}: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>()) + }) + } + + fn count_rows_in_fresh_tx(&self, table_id: TableId, context: &'static str) -> Result { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter(tx, table_id) + .map_err(|err| format!("{context}: {err}"))? + .count()) + }) + } + + fn count_by_col_eq_in_fresh_tx( + &self, + table_id: TableId, + col: u16, + value: &AlgebraicValue, + context: &'static str, + ) -> Result { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("{context}: {err}"))? + .count()) + }) + } + + fn range_scan_in_fresh_tx( + &self, + table_id: TableId, + cols: spacetimedb_primitives::ColList, + bounds: (Bound, Bound), + context: &'static str, + ) -> Result, String> { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_range(tx, table_id, cols, bounds) + .map_err(|err| format!("{context}: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>()) + }) + } + fn lookup_base_row(&self, conn: SessionId, table: usize, id: u64) -> Result, String> { let table_id = self.table_id_for_index(table)?; if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { @@ -1079,17 +1102,13 @@ impl RelationalDbEngine { .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .next()) } else { - let db = self.db()?; - let tx = db.begin_tx(Workload::ForTests); - self.stats.record_read_tx(); - let found = self - .db()? - .iter_by_col_eq(&tx, table_id, 0u16, &AlgebraicValue::U64(id)) - .map_err(|err| format!("lookup failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .next(); - let _ = db.release_tx(tx); - Ok(found) + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + }) } } @@ -1139,7 +1158,7 @@ impl RelationalDbEngine { .map_err(|err| format!("read-tx predicate query failed: {err}"))? .count()) } else { - self.count_by_col_eq_for_property(table, col, value) + self.count_by_col_eq_in_fresh_tx(table_id, col, value, "predicate query failed") } } @@ -1166,17 +1185,7 @@ impl RelationalDbEngine { .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>() } else { - let db = self.db()?; - let tx = db.begin_tx(Workload::ForTests); - self.stats.record_read_tx(); - let rows = self - .db()? - .iter_by_col_range(&tx, table_id, col_list, (lower, upper)) - .map_err(|err| format!("range scan failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - let _ = db.release_tx(tx); - rows + self.range_scan_in_fresh_tx(table_id, col_list, (lower, upper), "range scan failed")? }; rows.sort_by(|lhs, rhs| compare_rows_for_range(lhs, rhs, cols)); Ok(rows) @@ -1184,30 +1193,12 @@ impl RelationalDbEngine { fn count_rows_for_property(&self, table: usize) -> Result { let table_id = self.table_id_for_index(table)?; - let db = self.db()?; - let tx = db.begin_tx(Workload::ForTests); - self.stats.record_read_tx(); - let total = self - .db()? - .iter(&tx, table_id) - .map_err(|err| format!("scan failed: {err}"))? - .count(); - let _ = db.release_tx(tx); - Ok(total) + self.count_rows_in_fresh_tx(table_id, "scan failed") } fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { let table_id = self.table_id_for_index(table)?; - let db = self.db()?; - let tx = db.begin_tx(Workload::ForTests); - self.stats.record_read_tx(); - let total = self - .db()? - .iter_by_col_eq(&tx, table_id, col, value) - .map_err(|err| format!("predicate query failed: {err}"))? - .count(); - let _ = db.release_tx(tx); - Ok(total) + self.count_by_col_eq_in_fresh_tx(table_id, col, value, "predicate query failed") } fn range_scan_for_property( @@ -1218,31 +1209,12 @@ impl RelationalDbEngine { upper: Bound, ) -> Result, String> { let table_id = self.table_id_for_index(table)?; - let db = self.db()?; - let tx = db.begin_tx(Workload::ForTests); - self.stats.record_read_tx(); let cols = cols.iter().copied().collect::(); - let rows = self - .db()? - .iter_by_col_range(&tx, table_id, cols, (lower, upper)) - .map_err(|err| format!("range scan failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - let _ = db.release_tx(tx); - Ok(rows) + self.range_scan_in_fresh_tx(table_id, cols, (lower, upper), "range scan failed") } fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { - let db = self.db()?; - let tx = db.begin_tx(Workload::ForTests); - self.stats.record_read_tx(); - let mut rows = self - .db()? - .iter(&tx, table_id) - .map_err(|err| format!("scan failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - let _ = db.release_tx(tx); + let mut rows = self.collect_rows_in_fresh_tx(table_id, "scan failed")?; rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) } @@ -1354,6 +1326,10 @@ impl TargetPropertyAccess for RelationalDbEngine { Self::lookup_base_row(self, conn, table, id) } + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { + Self::collect_rows_in_connection(self, conn, table) + } + fn collect_rows_for_table(&self, table: usize) -> Result, String> { let table_id = self.table_id_for_index(table)?; Self::collect_rows_by_id(self, table_id) @@ -1527,6 +1503,10 @@ fn is_unique_constraint_violation(err: &DBError) -> bool { ) } +fn is_write_conflict_error(err: &str) -> bool { + err.contains("owns lock") +} + fn compare_rows_for_range(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { lhs.project_key(cols) .to_algebraic_value() diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index c01d6ff8880..611acc28f94 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -3,7 +3,7 @@ use std::collections::{BTreeSet, VecDeque}; use crate::{ - core::NextInteractionSource, + core::WorkloadSource, schema::SchemaPlan, seed::{DstRng, DstSeed}, workload::strategy::{Index, Percent, Strategy}, @@ -16,7 +16,6 @@ use crate::{ /// Generation profile for commitlog-specific interactions layered around table ops. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(crate) struct CommitlogWorkloadProfile { - pub(crate) chaos_sync_pct: usize, pub(crate) close_reopen_pct: usize, pub(crate) create_dynamic_table_pct: usize, pub(crate) migrate_after_create_pct: usize, @@ -27,7 +26,6 @@ pub(crate) struct CommitlogWorkloadProfile { impl Default for CommitlogWorkloadProfile { fn default() -> Self { Self { - chaos_sync_pct: 18, close_reopen_pct: 1, create_dynamic_table_pct: 1, migrate_after_create_pct: 55, @@ -102,9 +100,6 @@ impl CommitlogWorkloadSource { return true; } - if Percent::new(self.profile.chaos_sync_pct).sample(&mut self.rng) { - self.pending.push_back(CommitlogInteraction::ChaosSync); - } if Percent::new(self.profile.close_reopen_pct).sample(&mut self.rng) { self.pending.push_back(CommitlogInteraction::CloseReopen); } @@ -176,7 +171,7 @@ impl CommitlogWorkloadSource { } } -impl NextInteractionSource for CommitlogWorkloadSource { +impl WorkloadSource for CommitlogWorkloadSource { type Interaction = CommitlogInteraction; fn next_interaction(&mut self) -> Option { diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index ece7687de04..cdaac71adb8 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -18,8 +18,6 @@ pub enum CommitlogInteraction { DropDynamicTable { conn: SessionId, slot: u32 }, /// Migrate dynamic table schema for a slot. MigrateDynamicTable { conn: SessionId, slot: u32 }, - /// Ask the mock commitlog file layer to run a sync attempt. - ChaosSync, /// Close and restart the database from durable history. CloseReopen, } @@ -63,7 +61,6 @@ pub struct InteractionSummary { pub create_dynamic_table: usize, pub drop_dynamic_table: usize, pub migrate_dynamic_table: usize, - pub chaos_sync: usize, pub close_reopen_requested: usize, pub close_reopen_applied: usize, pub close_reopen_skipped: usize, diff --git a/crates/dst/src/workload/module_ops/generation.rs b/crates/dst/src/workload/module_ops/generation.rs index 119922dca9e..c3f8a2e8e77 100644 --- a/crates/dst/src/workload/module_ops/generation.rs +++ b/crates/dst/src/workload/module_ops/generation.rs @@ -1,6 +1,6 @@ use crate::{ client::SessionId, - core::NextInteractionSource, + core::WorkloadSource, schema::generate_value_for_type, seed::{DstRng, DstSeed}, workload::strategy::{Index, Strategy, Weighted}, @@ -111,7 +111,7 @@ fn supports_generation(ty: &spacetimedb_sats::AlgebraicType) -> bool { ) } -impl NextInteractionSource for ModuleWorkloadSource { +impl WorkloadSource for ModuleWorkloadSource { type Interaction = ModuleInteraction; fn next_interaction(&mut self) -> Option { diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index 792fdf2ba13..4c3da2e236b 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -2,7 +2,7 @@ use std::collections::VecDeque; use crate::{ client::SessionId, - core::NextInteractionSource, + core::WorkloadSource, schema::{ColumnPlan, SchemaPlan, TablePlan}, seed::{DstRng, DstSeed}, workload::strategy::{Index, Percent, Strategy}, @@ -25,7 +25,7 @@ pub struct TableWorkloadSource { rng: DstRng, // Scenario-specific workload policy layered on top of the shared model. scenario: S, - // Generator-side expected state used to decide what interactions are legal. + // Generator-side model used to decide what interactions are legal. model: GenerationModel, num_connections: usize, // Soft budget for scenario-generated interactions. Finish mode may emit a @@ -171,11 +171,6 @@ impl<'a> ScenarioPlanner<'a> { self.model.batch_delete(conn, table, rows); } - pub fn reinsert(&mut self, conn: SessionId, table: usize, row: crate::schema::SimRow) { - self.model.delete(conn, table, row.clone()); - self.model.insert(conn, table, row); - } - pub fn add_column(&mut self, table: usize, column: ColumnPlan, default: spacetimedb_sats::AlgebraicValue) { self.model.add_column(table, column, default); } @@ -300,7 +295,7 @@ impl TableWorkloadSource { } } -impl NextInteractionSource for TableWorkloadSource { +impl WorkloadSource for TableWorkloadSource { type Interaction = TableWorkloadInteraction; fn next_interaction(&mut self) -> Option { diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index 18dbd2e3a14..facf8a92734 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -7,7 +7,7 @@ pub(crate) mod strategies; mod types; pub(crate) use generation::TableWorkloadSource; -pub(crate) use model::ExpectedModel; +pub(crate) use model::{PredictedOutcome, TableOracle}; pub use scenarios::TableScenarioId; pub(crate) use types::{ConnectionWriteState, TableScenario}; -pub use types::{ExpectedErrorKind, ExpectedResult, TableOperation, TableWorkloadInteraction, TableWorkloadOutcome}; +pub use types::{TableErrorKind, TableInteractionCase, TableOperation, TableWorkloadInteraction, TableWorkloadOutcome}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index ab78106f019..cf9412b1c4c 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -8,7 +8,7 @@ use crate::{ seed::{DstRng, DstSeed}, }; -use super::{ExpectedResult, TableOperation, TableWorkloadInteraction}; +use super::{TableErrorKind, TableOperation}; /// Generator-side model of committed rows plus per-connection pending writes. /// @@ -227,18 +227,30 @@ impl GenerationModel { } } -/// Replay model for the expected final committed state of a table workload. +/// Replay model used as the oracle for table workload properties. /// /// Target property runtimes apply every table interaction here in parallel with /// real target execution, then compare the collected target outcome against this /// model at the end of the run. #[derive(Clone, Debug)] -pub struct ExpectedModel { +pub struct TableOracle { committed: Vec>, connections: Vec, active_writer: Option, } +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum PredictedOutcome { + Applied, + NoMutation { + subject: Option<(SessionId, usize)>, + }, + Error { + kind: TableErrorKind, + subject: Option<(SessionId, usize)>, + }, +} + #[derive(Clone, Debug, Default)] struct ExpectedConnection { in_tx: bool, @@ -247,7 +259,7 @@ struct ExpectedConnection { staged_deletes: Vec<(usize, SimRow)>, } -impl ExpectedModel { +impl TableOracle { pub fn new(table_count: usize, connection_count: usize) -> Self { Self { committed: vec![Vec::new(); table_count], @@ -256,15 +268,62 @@ impl ExpectedModel { } } - pub fn apply(&mut self, interaction: &TableWorkloadInteraction) { - if !matches!(interaction.expected, ExpectedResult::Ok) { - return; + pub fn predict(&self, op: &TableOperation) -> Result { + match op { + TableOperation::BeginTx { conn } => { + self.ensure_connection(*conn)?; + if self.connections[conn.as_index()].read_snapshot.is_some() { + return Err(format!("connection {conn} cannot begin write tx with open read tx")); + } + if self.connections[conn.as_index()].in_tx { + return Err(format!("connection {conn} already has open write tx")); + } + if self.active_writer.is_some() { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: None, + }); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::BeginReadTx { conn } => { + self.ensure_connection(*conn)?; + let state = &self.connections[conn.as_index()]; + if state.in_tx || state.read_snapshot.is_some() { + return Err(format!("connection {conn} cannot begin read tx in current state")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::ReleaseReadTx { conn } => { + self.ensure_connection(*conn)?; + if self.connections[conn.as_index()].read_snapshot.is_none() { + return Err(format!("connection {conn} has no read tx to release")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::CommitTx { conn } | TableOperation::RollbackTx { conn } => { + self.ensure_connection(*conn)?; + if self.active_writer != Some(*conn) || !self.connections[conn.as_index()].in_tx { + return Err(format!("connection {conn} does not own an open write tx")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::InsertRows { conn, table, rows } => self.predict_insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.predict_delete_rows(*conn, *table, rows), + TableOperation::AddColumn { .. } | TableOperation::AddIndex { .. } => Ok(PredictedOutcome::Applied), + TableOperation::PointLookup { .. } + | TableOperation::PredicateCount { .. } + | TableOperation::RangeScan { .. } + | TableOperation::FullScan { .. } => Ok(PredictedOutcome::NoMutation { subject: None }), } - match &interaction.op { + } + + pub fn apply(&mut self, op: &TableOperation) { + match op { TableOperation::BeginTx { conn } => { assert!( self.active_writer.is_none(), - "multiple concurrent writers in expected model" + "multiple concurrent writers in table oracle" ); self.connections[conn.as_index()].in_tx = true; self.active_writer = Some(*conn); @@ -272,7 +331,7 @@ impl ExpectedModel { TableOperation::BeginReadTx { conn } => { let state = &mut self.connections[conn.as_index()]; assert!(!state.in_tx, "read tx started while write tx is open"); - assert!(state.read_snapshot.is_none(), "nested read tx in expected model"); + assert!(state.read_snapshot.is_none(), "nested read tx in table oracle"); state.read_snapshot = Some(self.committed.clone()); } TableOperation::ReleaseReadTx { conn } => { @@ -282,7 +341,7 @@ impl ExpectedModel { ); } TableOperation::CommitTx { conn } => { - assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in expected model"); + assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in table oracle"); let state = &mut self.connections[conn.as_index()]; for (table, row) in state.staged_deletes.drain(..) { self.committed[table].retain(|candidate| *candidate != row); @@ -294,37 +353,15 @@ impl ExpectedModel { self.active_writer = None; } TableOperation::RollbackTx { conn } => { - assert_eq!( - self.active_writer, - Some(*conn), - "rollback by non-owner in expected model" - ); + assert_eq!(self.active_writer, Some(*conn), "rollback by non-owner in table oracle"); let state = &mut self.connections[conn.as_index()]; state.staged_inserts.clear(); state.staged_deletes.clear(); state.in_tx = false; self.active_writer = None; } - TableOperation::Insert { conn, table, row } => { - self.insert(*conn, *table, row.clone()); - } - TableOperation::Delete { conn, table, row } => { - self.delete(*conn, *table, row.clone()); - } - TableOperation::BatchInsert { conn, table, rows } => { - for row in rows { - self.insert(*conn, *table, row.clone()); - } - } - TableOperation::BatchDelete { conn, table, rows } => { - for row in rows { - self.delete(*conn, *table, row.clone()); - } - } - TableOperation::Reinsert { conn, table, row } => { - self.delete(*conn, *table, row.clone()); - self.insert(*conn, *table, row.clone()); - } + TableOperation::InsertRows { conn, table, rows } => self.insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.delete_rows(*conn, *table, rows), TableOperation::AddColumn { table, column: _, @@ -334,18 +371,98 @@ impl ExpectedModel { self.add_column(*table, default.clone()); } TableOperation::AddIndex { .. } => {} - TableOperation::ExactDuplicateInsert { .. } - | TableOperation::UniqueKeyConflictInsert { .. } - | TableOperation::DeleteMissing { .. } - | TableOperation::BeginTxConflict { .. } - | TableOperation::WriteConflictInsert { .. } - | TableOperation::PointLookup { .. } + TableOperation::PointLookup { .. } | TableOperation::PredicateCount { .. } | TableOperation::RangeScan { .. } | TableOperation::FullScan { .. } => {} } } + fn predict_insert_rows(&self, conn: SessionId, table: usize, rows: &[SimRow]) -> Result { + if let Some(outcome) = self.predict_write_access(conn, table)? { + return Ok(outcome); + } + + let mut visible = self.visible_rows(conn, table); + let mut mutates = false; + for row in rows { + let Some(id) = row.id() else { + return Err(format!("insert row for table {table} is missing primary id: {row:?}")); + }; + match visible.iter().find(|candidate| candidate.id() == Some(id)) { + Some(existing) if existing == row => {} + Some(_) => { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::UniqueConstraintViolation, + subject: Some((conn, table)), + }); + } + None => { + mutates = true; + visible.push(row.clone()); + } + } + } + + if mutates { + Ok(PredictedOutcome::Applied) + } else { + Ok(PredictedOutcome::NoMutation { + subject: Some((conn, table)), + }) + } + } + + fn predict_delete_rows(&self, conn: SessionId, table: usize, rows: &[SimRow]) -> Result { + if let Some(outcome) = self.predict_write_access(conn, table)? { + return Ok(outcome); + } + + let mut visible = self.visible_rows(conn, table); + for row in rows { + let Some(idx) = visible.iter().position(|candidate| candidate == row) else { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::MissingRow, + subject: Some((conn, table)), + }); + }; + visible.remove(idx); + } + + Ok(PredictedOutcome::Applied) + } + + fn predict_write_access(&self, conn: SessionId, table: usize) -> Result, String> { + self.ensure_connection(conn)?; + self.ensure_table(table)?; + if self.connections[conn.as_index()].read_snapshot.is_some() { + return Err(format!("connection {conn} cannot write while read tx is open")); + } + if let Some(owner) = self.active_writer + && owner != conn + { + return Ok(Some(PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: Some((conn, table)), + })); + } + Ok(None) + } + + fn ensure_connection(&self, conn: SessionId) -> Result<(), String> { + self.connections + .get(conn.as_index()) + .map(|_| ()) + .ok_or_else(|| format!("connection {conn} out of range")) + } + + fn ensure_table(&self, table: usize) -> Result<(), String> { + self.committed + .get(table) + .map(|_| ()) + .ok_or_else(|| format!("table {table} out of range")) + } + pub fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { let conn_idx = conn.as_index(); if let Some(snapshot) = &self.connections[conn_idx].read_snapshot { @@ -420,6 +537,19 @@ impl ExpectedModel { } } + fn insert_rows(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + if self + .visible_rows(conn, table) + .into_iter() + .any(|candidate| candidate == *row) + { + continue; + } + self.insert(conn, table, row.clone()); + } + } + fn delete(&mut self, conn: SessionId, table: usize, row: SimRow) { let state = &mut self.connections[conn.as_index()]; if state.in_tx { @@ -432,6 +562,12 @@ impl ExpectedModel { } } + fn delete_rows(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + self.delete(conn, table, row.clone()); + } + } + fn add_column(&mut self, table: usize, default: AlgebraicValue) { for row in &mut self.committed[table] { row.values.push(default.clone()); diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index d728c7f1ad9..0cb699dbc24 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -9,7 +9,7 @@ use crate::{ workload::strategy::{Index, Percent, Strategy}, }; -use super::super::{generation::ScenarioPlanner, TableWorkloadInteraction, TableWorkloadOutcome}; +use super::super::{generation::ScenarioPlanner, TableInteractionCase, TableWorkloadInteraction, TableWorkloadOutcome}; #[derive(Clone, Copy)] struct TableWorkloadProfile { @@ -287,8 +287,15 @@ fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: SessionId, } if planner.roll_percent(6) { let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); - planner.reinsert(conn, table, row.clone()); - planner.push_interaction(TableWorkloadInteraction::reinsert(conn, table, row)); + planner.delete(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::delete_with_case( + conn, + table, + row.clone(), + TableInteractionCase::Reinsert, + )); + planner.insert(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::insert(conn, table, row)); return; } diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index ac5b35f1fa6..9302daec70a 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -24,7 +24,12 @@ pub(crate) trait TableScenario: Clone { #[derive(Clone, Debug, Eq, PartialEq)] pub struct PlannedInteraction { pub op: TableOperation, - pub expected: ExpectedResult, + /// Generator-side coverage/debug label. + /// + /// Correctness must not depend on this field. Properties predict expected + /// behavior from the model and `op`; this label only preserves intent in + /// summaries and failure reports. + pub case: TableInteractionCase, } pub type TableWorkloadInteraction = PlannedInteraction; @@ -41,44 +46,18 @@ pub enum TableOperation { BeginReadTx { conn: SessionId }, /// Release a previously opened read snapshot. ReleaseReadTx { conn: SessionId }, - /// Attempt to start a second writer while another connection owns the write lock. - BeginTxConflict { owner: SessionId, conn: SessionId }, - /// Attempt an auto-commit write while another connection owns the write lock. - WriteConflictInsert { - owner: SessionId, - conn: SessionId, - table: usize, - row: SimRow, - }, - /// Insert a new row with a fresh primary id. - Insert { conn: SessionId, table: usize, row: SimRow }, - /// Delete an existing visible row. - Delete { conn: SessionId, table: usize, row: SimRow }, - /// Reinsert an exact row that is already visible. - /// - /// RelationalDB has set semantics for identical rows, so this should be an - /// idempotent no-op rather than a unique-key error. - ExactDuplicateInsert { conn: SessionId, table: usize, row: SimRow }, - /// Insert a row with an existing primary id but different non-key payload. - /// - /// This is the operation that should fail with `UniqueConstraintViolation`. - UniqueKeyConflictInsert { conn: SessionId, table: usize, row: SimRow }, - /// Delete a row that is absent from the visible state. - DeleteMissing { conn: SessionId, table: usize, row: SimRow }, - /// Insert several fresh rows in one interaction. - BatchInsert { + /// Insert one or more rows. + InsertRows { conn: SessionId, table: usize, rows: Vec, }, - /// Delete several visible rows in one interaction. - BatchDelete { + /// Delete one or more rows. + DeleteRows { conn: SessionId, table: usize, rows: Vec, }, - /// Delete and insert the same row, stressing delete/insert ordering. - Reinsert { conn: SessionId, table: usize, row: SimRow }, /// Add a column to an existing table with a default for live rows. AddColumn { conn: SessionId, @@ -114,134 +93,172 @@ pub enum TableOperation { } #[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum ExpectedResult { - Ok, - Err(ExpectedErrorKind), -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum ExpectedErrorKind { +pub enum TableErrorKind { UniqueConstraintViolation, MissingRow, WriteConflict, } -impl PlannedInteraction { - pub fn ok(op: TableOperation) -> Self { - Self { - op, - expected: ExpectedResult::Ok, - } - } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum TableInteractionCase { + BeginTx, + CommitTx, + RollbackTx, + BeginReadTx, + ReleaseReadTx, + BeginTxConflict, + WriteConflictInsert, + Insert, + Delete, + ExactDuplicateInsert, + UniqueKeyConflictInsert, + DeleteMissing, + BatchInsert, + BatchDelete, + Reinsert, + AddColumn, + AddIndex, + PointLookup, + PredicateCount, + RangeScan, + FullScan, +} - pub fn expected_err(op: TableOperation, kind: ExpectedErrorKind) -> Self { - Self { - op, - expected: ExpectedResult::Err(kind), - } +impl PlannedInteraction { + pub fn new(op: TableOperation, case: TableInteractionCase) -> Self { + Self { op, case } } pub fn begin_tx(conn: SessionId) -> Self { - Self::ok(TableOperation::BeginTx { conn }) + Self::new(TableOperation::BeginTx { conn }, TableInteractionCase::BeginTx) } pub fn commit_tx(conn: SessionId) -> Self { - Self::ok(TableOperation::CommitTx { conn }) + Self::new(TableOperation::CommitTx { conn }, TableInteractionCase::CommitTx) } pub fn rollback_tx(conn: SessionId) -> Self { - Self::ok(TableOperation::RollbackTx { conn }) + Self::new(TableOperation::RollbackTx { conn }, TableInteractionCase::RollbackTx) } pub fn begin_read_tx(conn: SessionId) -> Self { - Self::ok(TableOperation::BeginReadTx { conn }) + Self::new(TableOperation::BeginReadTx { conn }, TableInteractionCase::BeginReadTx) } pub fn release_read_tx(conn: SessionId) -> Self { - Self::ok(TableOperation::ReleaseReadTx { conn }) + Self::new( + TableOperation::ReleaseReadTx { conn }, + TableInteractionCase::ReleaseReadTx, + ) } - pub fn begin_tx_conflict(owner: SessionId, conn: SessionId) -> Self { - Self::expected_err( - TableOperation::BeginTxConflict { owner, conn }, - ExpectedErrorKind::WriteConflict, - ) + pub fn begin_tx_conflict(_owner: SessionId, conn: SessionId) -> Self { + Self::new(TableOperation::BeginTx { conn }, TableInteractionCase::BeginTxConflict) } - pub fn write_conflict_insert(owner: SessionId, conn: SessionId, table: usize, row: SimRow) -> Self { - Self::expected_err( - TableOperation::WriteConflictInsert { - owner, + pub fn write_conflict_insert(_owner: SessionId, conn: SessionId, table: usize, row: SimRow) -> Self { + Self::new( + TableOperation::InsertRows { conn, table, - row, + rows: vec![row], }, - ExpectedErrorKind::WriteConflict, + TableInteractionCase::WriteConflictInsert, ) } pub fn insert(conn: SessionId, table: usize, row: SimRow) -> Self { - Self::ok(TableOperation::Insert { conn, table, row }) + Self::insert_with_case(conn, table, row, TableInteractionCase::Insert) + } + + pub fn insert_with_case(conn: SessionId, table: usize, row: SimRow, case: TableInteractionCase) -> Self { + Self::new( + TableOperation::InsertRows { + conn, + table, + rows: vec![row], + }, + case, + ) } pub fn delete(conn: SessionId, table: usize, row: SimRow) -> Self { - Self::ok(TableOperation::Delete { conn, table, row }) + Self::delete_with_case(conn, table, row, TableInteractionCase::Delete) + } + + pub fn delete_with_case(conn: SessionId, table: usize, row: SimRow, case: TableInteractionCase) -> Self { + Self::new( + TableOperation::DeleteRows { + conn, + table, + rows: vec![row], + }, + case, + ) } pub fn exact_duplicate_insert(conn: SessionId, table: usize, row: SimRow) -> Self { - Self::ok(TableOperation::ExactDuplicateInsert { conn, table, row }) + Self::insert_with_case(conn, table, row, TableInteractionCase::ExactDuplicateInsert) } pub fn unique_key_conflict_insert(conn: SessionId, table: usize, row: SimRow) -> Self { - Self::expected_err( - TableOperation::UniqueKeyConflictInsert { conn, table, row }, - ExpectedErrorKind::UniqueConstraintViolation, - ) + Self::insert_with_case(conn, table, row, TableInteractionCase::UniqueKeyConflictInsert) } pub fn delete_missing(conn: SessionId, table: usize, row: SimRow) -> Self { - Self::expected_err( - TableOperation::DeleteMissing { conn, table, row }, - ExpectedErrorKind::MissingRow, - ) + Self::delete_with_case(conn, table, row, TableInteractionCase::DeleteMissing) } pub fn batch_insert(conn: SessionId, table: usize, rows: Vec) -> Self { - Self::ok(TableOperation::BatchInsert { conn, table, rows }) + Self::new( + TableOperation::InsertRows { conn, table, rows }, + TableInteractionCase::BatchInsert, + ) } pub fn batch_delete(conn: SessionId, table: usize, rows: Vec) -> Self { - Self::ok(TableOperation::BatchDelete { conn, table, rows }) - } - - pub fn reinsert(conn: SessionId, table: usize, row: SimRow) -> Self { - Self::ok(TableOperation::Reinsert { conn, table, row }) + Self::new( + TableOperation::DeleteRows { conn, table, rows }, + TableInteractionCase::BatchDelete, + ) } pub fn add_column(conn: SessionId, table: usize, column: ColumnPlan, default: AlgebraicValue) -> Self { - Self::ok(TableOperation::AddColumn { - conn, - table, - column, - default, - }) + Self::new( + TableOperation::AddColumn { + conn, + table, + column, + default, + }, + TableInteractionCase::AddColumn, + ) } pub fn add_index(conn: SessionId, table: usize, cols: Vec) -> Self { - Self::ok(TableOperation::AddIndex { conn, table, cols }) + Self::new( + TableOperation::AddIndex { conn, table, cols }, + TableInteractionCase::AddIndex, + ) } pub fn point_lookup(conn: SessionId, table: usize, id: u64) -> Self { - Self::ok(TableOperation::PointLookup { conn, table, id }) + Self::new( + TableOperation::PointLookup { conn, table, id }, + TableInteractionCase::PointLookup, + ) } pub fn predicate_count(conn: SessionId, table: usize, col: u16, value: AlgebraicValue) -> Self { - Self::ok(TableOperation::PredicateCount { - conn, - table, - col, - value, - }) + Self::new( + TableOperation::PredicateCount { + conn, + table, + col, + value, + }, + TableInteractionCase::PredicateCount, + ) } pub fn range_scan( @@ -251,17 +268,20 @@ impl PlannedInteraction { lower: Bound, upper: Bound, ) -> Self { - Self::ok(TableOperation::RangeScan { - conn, - table, - cols, - lower, - upper, - }) + Self::new( + TableOperation::RangeScan { + conn, + table, + cols, + lower, + upper, + }, + TableInteractionCase::RangeScan, + ) } pub fn full_scan(conn: SessionId, table: usize) -> Self { - Self::ok(TableOperation::FullScan { conn, table }) + Self::new(TableOperation::FullScan { conn, table }, TableInteractionCase::FullScan) } } From 7282b9b1d7f4f532a75fb2a6efbbe6d22d3dd1ab Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 5 May 2026 13:01:28 +0530 Subject: [PATCH 27/74] slim down to datastore focused --- Cargo.lock | 16 +- crates/core/Cargo.toml | 2 +- crates/core/src/lib.rs | 1 + crates/core/src/runtime.rs | 13 + crates/dst/Cargo.toml | 10 +- crates/dst/README.md | 34 +- crates/dst/build.rs | 10 - crates/dst/src/core/mod.rs | 39 +- crates/dst/src/lib.rs | 11 +- crates/dst/src/main.rs | 46 +- crates/dst/src/sim/executor.rs | 414 ++++++++++++++++++ crates/dst/src/sim/mod.rs | 33 ++ crates/dst/src/sim/rng.rs | 74 ++++ crates/dst/src/targets/buggified_repo.rs | 25 +- crates/dst/src/targets/descriptor.rs | 37 +- crates/dst/src/targets/mod.rs | 1 - .../src/targets/relational_db_commitlog.rs | 41 +- crates/dst/src/targets/standalone_host.rs | 381 ---------------- .../dst/src/workload/commitlog_ops/types.rs | 2 +- crates/dst/src/workload/mod.rs | 1 - .../dst/src/workload/module_ops/generation.rs | 128 ------ crates/dst/src/workload/module_ops/mod.rs | 7 - crates/dst/src/workload/module_ops/types.rs | 43 -- crates/dst/tests/madsim_tcp.rs | 39 -- crates/durability/Cargo.toml | 4 +- crates/io/Cargo.toml | 2 +- crates/io/src/lib.rs | 5 +- 27 files changed, 608 insertions(+), 811 deletions(-) create mode 100644 crates/core/src/runtime.rs delete mode 100644 crates/dst/build.rs create mode 100644 crates/dst/src/sim/executor.rs create mode 100644 crates/dst/src/sim/mod.rs create mode 100644 crates/dst/src/sim/rng.rs delete mode 100644 crates/dst/src/targets/standalone_host.rs delete mode 100644 crates/dst/src/workload/module_ops/generation.rs delete mode 100644 crates/dst/src/workload/module_ops/mod.rs delete mode 100644 crates/dst/src/workload/module_ops/types.rs delete mode 100644 crates/dst/tests/madsim_tcp.rs diff --git a/Cargo.lock b/Cargo.lock index 5cf1422dccb..c4c53445e5d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8171,7 +8171,6 @@ dependencies = [ "itertools 0.12.1", "lazy_static", "log", - "madsim-tokio", "memchr", "nix 0.30.1", "nohash-hasher", @@ -8235,6 +8234,7 @@ dependencies = [ "thiserror 1.0.69", "tikv-jemalloc-ctl", "tikv-jemallocator", + "tokio", "tokio-metrics", "tokio-stream", "tokio-util", @@ -8310,25 +8310,17 @@ name = "spacetimedb-dst" version = "2.2.0" dependencies = [ "anyhow", - "bytes", + "async-task", "clap 4.5.50", "futures-util", - "madsim", - "madsim-tokio", - "spacetimedb-cli", - "spacetimedb-client-api", - "spacetimedb-client-api-messages", "spacetimedb-commitlog", "spacetimedb-core", "spacetimedb-datastore", "spacetimedb-durability", - "spacetimedb-execution", "spacetimedb-lib 2.2.0", - "spacetimedb-paths", "spacetimedb-primitives 2.2.0", "spacetimedb-sats 2.2.0", "spacetimedb-schema", - "spacetimedb-standalone", "spacetimedb-table", "tracing", "tracing-subscriber", @@ -8343,7 +8335,6 @@ dependencies = [ "futures", "itertools 0.12.1", "log", - "madsim-tokio", "scopeguard", "spacetimedb-commitlog", "spacetimedb-fs-utils", @@ -8351,6 +8342,7 @@ dependencies = [ "spacetimedb-sats 2.2.0", "tempfile", "thiserror 1.0.69", + "tokio", "tracing", ] @@ -8418,7 +8410,7 @@ dependencies = [ name = "spacetimedb-io" version = "2.2.0" dependencies = [ - "madsim-tokio", + "tokio", ] [[package]] diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 443c355e79e..ed8c2ac9e9d 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -106,7 +106,7 @@ tempfile.workspace = true thiserror.workspace = true thin-vec.workspace = true tokio-util.workspace = true -tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } +tokio.workspace = true tokio-stream = { workspace = true, features = ["sync"] } tokio-metrics = { version = "0.4.0", features = ["rt"] } toml.workspace = true diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 26b35230b1f..4a7246bcbd7 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -18,6 +18,7 @@ pub mod estimation; pub mod host; pub mod module_host_context; pub mod replica_context; +pub mod runtime; pub mod startup; pub mod subscription; pub mod util; diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs new file mode 100644 index 00000000000..0d16e2d3566 --- /dev/null +++ b/crates/core/src/runtime.rs @@ -0,0 +1,13 @@ +//! Opaque runtime boundary for crates that should not depend on Tokio directly. + +pub type Handle = tokio::runtime::Handle; +pub type Runtime = tokio::runtime::Runtime; + +pub fn current_handle_or_new_runtime() -> anyhow::Result<(Handle, Option)> { + if let Ok(handle) = Handle::try_current() { + return Ok((handle, None)); + } + + let runtime = Runtime::new()?; + Ok((runtime.handle().clone(), Some(runtime))) +} diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 21ae398c299..1518cda9195 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -16,25 +16,17 @@ bench = false [dependencies] anyhow.workspace = true +async-task = "4.4" clap.workspace = true futures-util.workspace = true -tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } -bytes.workspace = true -spacetimedb-cli.workspace = true -spacetimedb-client-api.workspace = true -spacetimedb-client-api-messages.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0" } spacetimedb-commitlog = { workspace = true, features = ["test"] } spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0" } -spacetimedb-execution.workspace = true spacetimedb-lib.workspace = true -spacetimedb-paths.workspace = true spacetimedb-primitives.workspace = true spacetimedb-sats.workspace = true spacetimedb-schema = { workspace = true, features = ["test"] } -spacetimedb-standalone.workspace = true spacetimedb-table.workspace = true tracing.workspace = true tracing-subscriber.workspace = true -madsim = { path = "../../../../madsim/madsim" } diff --git a/crates/dst/README.md b/crates/dst/README.md index d22236d0dc4..e9c756a5646 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -53,10 +53,6 @@ replication traffic. Targets translate those IDs into their own handles: - `relational-db-commitlog` maps `SessionId` to direct write/read transaction slots. -- `standalone-host` currently maps `SessionId::ZERO` to its host - `ClientConnection`; reducer interactions already carry the logical session so - multi-session host workloads can be added without changing the interaction - shape again. - future replication targets can map `SessionId` plus endpoint/node IDs to a client connection routed through the simulated network. @@ -75,8 +71,7 @@ DST workloads use three building blocks: `table_ops` is the base table-transaction workload. `commitlog_ops` composes it and injects durability lifecycle operations such as sync, close/reopen, dynamic -table create/migrate/drop, and replay checks. `module_ops` drives standalone -host/module interactions. +table create/migrate/drop, and replay checks. Use this rule of thumb: @@ -116,10 +111,6 @@ or properties to trust generator-provided expectations. - `relational-db-commitlog`: runs table and commitlog lifecycle interactions against `RelationalDB`, local durability, dynamic schema operations, close/reopen, and replay-from-history checks. -- `standalone-host`: runs generated module interactions against a standalone - host environment. - -Both targets reuse shared workload families and the same streaming runner. ## Properties @@ -144,7 +135,7 @@ Current property families include: ## Fault Injection `relational-db-commitlog` can wrap the in-memory commitlog repo in -`BuggifiedRepo`. Fault decisions are deterministic in simulation runs and +`BuggifiedRepo`. Fault decisions are deterministic from the run seed and summarized in the final outcome. Profiles: @@ -169,25 +160,18 @@ Scenario examples: ```bash cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario banking --duration 5m cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario indexed-ranges --duration 5m -cargo run -p spacetimedb-dst -- run --target standalone-host --scenario host-smoke --max-interactions 100 ``` -madsim-backed simulation run with commitlog faults: +Run with commitlog faults: ```bash -RUSTFLAGS='--cfg madsim' cargo run -p spacetimedb-dst -- run \ +cargo run -p spacetimedb-dst -- run \ --target relational-db-commitlog \ --seed 42 \ --max-interactions 400 \ --commitlog-fault-profile default ``` -`--cfg madsim` is still the switch that enables madsim-tokio. Do not pass -`--cfg simulation` directly: that only enables SpacetimeDB's cfg gates and leaves -the madsim dependency in its normal Tokio/std mode. The workspace crates derive -`cfg(simulation)` from `cfg(madsim)` so SpacetimeDB source code does not need -provider-specific cfg gates. - Trace every interaction: ```bash @@ -212,6 +196,7 @@ Start here: - `src/workload/table_ops`: table interaction language, generation model, and scenarios. - `src/workload/commitlog_ops`: lifecycle layer over table workloads. +- `src/sim/`: local executor and deterministic-decision shim. - `src/properties.rs`: property catalog and oracle/model checks. - `src/targets/relational_db_commitlog.rs`: target adapter for RelationalDB, commitlog durability, fault injection, close/reopen, and replay. @@ -232,12 +217,11 @@ Start here: - No shrinker yet; seed replay is the current reproduction mechanism. - Sometimes-property reporting is still outcome-counter based, not a stable property-event catalog. -- madsim backs the current deterministic runtime/fault hooks; deeper - host/network/filesystem simulation still needs explicit runtime and IO - boundaries. +- The local `sim` shim is not a real simulator yet. It owns executor setup and + deterministic fault decisions so future simulator work has one boundary. - The current `RelationalDB` target drives open read snapshots to release before starting writes, because beginning a write behind an open read snapshot can block in this target shape. Interleaved read/write snapshot histories should come back once the target models that lock behavior explicitly. -- Current simulation builds still expose runtime-boundary gaps, including - `spawn_blocking` call sites and randomized standard `HashMap` state warnings. +- Runtime-boundary work for scheduler, time, network, filesystem, and lower + randomness sources is still future work. diff --git a/crates/dst/build.rs b/crates/dst/build.rs deleted file mode 100644 index 3982c077afc..00000000000 --- a/crates/dst/build.rs +++ /dev/null @@ -1,10 +0,0 @@ -fn main() { - println!("cargo:rerun-if-env-changed=CARGO_CFG_MADSIM"); - println!("cargo:rerun-if-env-changed=CARGO_CFG_SIMULATION"); - println!("cargo:rerun-if-env-changed=CARGO_ENCODED_RUSTFLAGS"); - println!("cargo:rerun-if-env-changed=RUSTFLAGS"); - - if std::env::var_os("CARGO_CFG_MADSIM").is_some() { - println!("cargo:rustc-cfg=simulation"); - } -} diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index 2a781a714c0..3920471971c 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -233,31 +233,32 @@ mod tests { } } - #[tokio::test] - async fn not_crash_catches_execute_panic() { - assert_not_crash_error(PanicPhase::Execute, "execute_interaction", "execute panic").await; + #[test] + fn not_crash_catches_execute_panic() { + assert_not_crash_error(PanicPhase::Execute, "execute_interaction", "execute panic"); } - #[tokio::test] - async fn not_crash_catches_finish_panic() { - assert_not_crash_error(PanicPhase::Finish, "finish", "finish panic").await; + #[test] + fn not_crash_catches_finish_panic() { + assert_not_crash_error(PanicPhase::Finish, "finish", "finish panic"); } - #[tokio::test] - async fn not_crash_catches_collect_outcome_panic() { - assert_not_crash_error(PanicPhase::CollectOutcome, "collect_outcome", "collect panic").await; + #[test] + fn not_crash_catches_collect_outcome_panic() { + assert_not_crash_error(PanicPhase::CollectOutcome, "collect_outcome", "collect panic"); } - async fn assert_not_crash_error(phase: PanicPhase, expected_phase: &str, expected_payload: &str) { - let err = run_streaming( - SingleStepSource::new(), - PanicEngine::new(phase), - NoopProperties, - RunConfig::with_max_interactions(1), - ) - .await - .unwrap_err() - .to_string(); + fn assert_not_crash_error(phase: PanicPhase, expected_phase: &str, expected_payload: &str) { + let mut runtime = crate::sim::Runtime::new(crate::seed::DstSeed(0)).expect("runtime"); + let err = runtime + .block_on(run_streaming( + SingleStepSource::new(), + PanicEngine::new(phase), + NoopProperties, + RunConfig::with_max_interactions(1), + )) + .unwrap_err() + .to_string(); assert!(err.contains("[NotCrash]")); assert!(err.contains(expected_phase)); diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index 92c3afb97e4..5463186a8b9 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -7,7 +7,7 @@ //! - [`properties`] for reusable semantic checks, //! - [`seed`] for deterministic seeds, //! - [`workload`] for scenario identifiers, -//! - [`targets`] for executable relational-db / standalone-host adapters. +//! - [`targets`] for the executable relational-db + commitlog adapter. //! //! ## DST principles //! @@ -32,13 +32,6 @@ //! 7. Shared randomness, weighting, and sampling helpers belong in the //! workload strategy module, not in ad hoc target or scenario code. -#[cfg(all(simulation, not(madsim)))] -compile_error!( - "cfg(simulation) enables SpacetimeDB simulation gates, but madsim itself \ - still requires cfg(madsim). Use RUSTFLAGS=\"--cfg madsim\" or ./run_dst.sh; \ - SpacetimeDB crates derive cfg(simulation) from cfg(madsim)." -); - /// Logical client/session identifiers shared by workloads and targets. pub mod client; /// Shared run-budget configuration for DST targets. @@ -50,6 +43,8 @@ pub(crate) mod properties; mod schema; /// Stable seed and RNG utilities used to make runs reproducible. pub mod seed; +/// Local executor and deterministic-decision shim. +pub mod sim; /// Concrete simulator targets. pub mod targets; /// Shared workload generators reused by multiple targets. diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index 527ed27f68d..7937350d01b 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -1,14 +1,11 @@ -use std::{ - future::Future, - time::{SystemTime, UNIX_EPOCH}, -}; +use std::time::{SystemTime, UNIX_EPOCH}; use clap::{Args, Parser, Subcommand, ValueEnum}; use spacetimedb_dst::{ config::{CommitlogFaultProfile, RunConfig}, seed::DstSeed, - targets::descriptor::{RelationalDbCommitlogDescriptor, StandaloneHostDescriptor, TargetDescriptor}, - workload::{module_ops::HostScenarioId, table_ops::TableScenarioId}, + targets::descriptor::{RelationalDbCommitlogDescriptor, TargetDescriptor}, + workload::table_ops::TableScenarioId, }; #[derive(Parser, Debug)] @@ -57,7 +54,6 @@ struct RunArgs { #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] enum TargetKind { RelationalDbCommitlog, - StandaloneHost, } #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] @@ -65,7 +61,6 @@ enum ScenarioKind { RandomCrud, IndexedRanges, Banking, - HostSmoke, } #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] @@ -120,10 +115,6 @@ fn run_command(args: RunArgs) -> anyhow::Result<()> { let scenario = map_table_scenario(args.target.scenario)?; run_prepared_target::(seed, scenario, config) } - TargetKind::StandaloneHost => { - let scenario = map_host_scenario(args.target.scenario)?; - run_prepared_target::(seed, scenario, config) - } } } @@ -133,25 +124,12 @@ fn run_prepared_target( config: RunConfig, ) -> anyhow::Result<()> { D::prepare(seed, &scenario, &config)?; - run_in_runtime(seed, run_target::(seed, scenario, config)) -} - -#[cfg(all(simulation, madsim))] -fn run_in_runtime(seed: DstSeed, future: F) -> anyhow::Result -where - F: Future>, -{ - let mut runtime = madsim::runtime::Runtime::with_seed_and_config(seed.0, madsim::Config::default()); + let mut runtime = spacetimedb_dst::sim::Runtime::new(seed)?; + // RelationalDB durability still runs on core's production runtime boundary. + // Let those external tasks wake the DST executor while this target is being + // migrated toward a fully local simulator. runtime.set_allow_system_thread(true); - runtime.block_on(future) -} - -#[cfg(not(all(simulation, madsim)))] -fn run_in_runtime(_seed: DstSeed, future: F) -> anyhow::Result -where - F: Future>, -{ - tokio::runtime::Runtime::new()?.block_on(future) + runtime.block_on(run_target::(seed, scenario, config)) } fn map_table_scenario(scenario: ScenarioKind) -> anyhow::Result { @@ -159,14 +137,6 @@ fn map_table_scenario(scenario: ScenarioKind) -> anyhow::Result ScenarioKind::RandomCrud => Ok(TableScenarioId::RandomCrud), ScenarioKind::IndexedRanges => Ok(TableScenarioId::IndexedRanges), ScenarioKind::Banking => Ok(TableScenarioId::Banking), - ScenarioKind::HostSmoke => anyhow::bail!("scenario host-smoke is only valid for --target standalone-host"), - } -} - -fn map_host_scenario(scenario: ScenarioKind) -> anyhow::Result { - match scenario { - ScenarioKind::HostSmoke => Ok(HostScenarioId::HostSmoke), - _ => anyhow::bail!("target standalone-host only supports --scenario host-smoke"), } } diff --git a/crates/dst/src/sim/executor.rs b/crates/dst/src/sim/executor.rs new file mode 100644 index 00000000000..8587b9aaa65 --- /dev/null +++ b/crates/dst/src/sim/executor.rs @@ -0,0 +1,414 @@ +//! Minimal asynchronous executor adapted from madsim's `sim/task` loop. + +use std::{ + collections::BTreeMap, + fmt, + future::Future, + panic::AssertUnwindSafe, + pin::Pin, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, + }, + task::{Context, Poll}, + thread::{self, Thread}, + time::Duration, +}; + +use futures_util::FutureExt; + +use crate::{seed::DstSeed, sim::Rng}; + +type Runnable = async_task::Runnable; + +/// A unique identifier for a simulated node. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct NodeId(u64); + +impl NodeId { + pub const MAIN: Self = Self(0); +} + +impl fmt::Display for NodeId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// A small single-threaded runtime for DST's top-level future. +/// +/// futures are scheduled as runnables, the ready queue +/// is sampled by deterministic RNG, and pending execution without future events +/// is considered a test hang unless external system threads are explicitly +/// allowed for the current target. +pub struct Runtime { + executor: Arc, +} + +impl Runtime { + pub fn new(seed: DstSeed) -> anyhow::Result { + Ok(Self { + executor: Arc::new(Executor::new(seed)), + }) + } + + pub fn block_on(&mut self, future: F) -> F::Output { + self.executor.block_on(future) + } + + /// Allow parking briefly for non-DST runtime threads to wake the root task. + /// + /// This is currently needed by the relational target while durability still + /// uses core's production runtime boundary. + pub fn set_allow_system_thread(&mut self, allowed: bool) { + self.executor.set_allow_system_thread(allowed); + } + + pub fn handle(&self) -> Handle { + Handle { + executor: Arc::clone(&self.executor), + } + } + + pub fn create_node(&self) -> NodeId { + self.handle().create_node() + } + + pub fn pause(&self, node: NodeId) { + self.handle().pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.handle().resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.handle().spawn_on(node, future) + } +} + +/// Cloneable access to the simulation executor. +#[derive(Clone)] +pub struct Handle { + executor: Arc, +} + +impl Handle { + pub fn create_node(&self) -> NodeId { + self.executor.create_node() + } + + pub fn pause(&self, node: NodeId) { + self.executor.pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.executor.resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.executor.spawn_on(node, future) + } +} + +/// A spawned simulated task. +pub struct JoinHandle { + task: async_task::Task, +} + +impl JoinHandle { + pub fn detach(self) { + self.task.detach(); + } +} + +impl Future for JoinHandle { + type Output = T; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + Pin::new(&mut self.task).poll(cx) + } +} + +struct Executor { + queue: Receiver, + sender: Sender, + nodes: Mutex>>, + next_node: std::sync::atomic::AtomicU64, + rng: Mutex, + allow_system_thread: AtomicBool, +} + +impl Executor { + fn new(seed: DstSeed) -> Self { + let queue = Queue::new(); + let mut nodes = BTreeMap::new(); + nodes.insert(NodeId::MAIN, Arc::new(NodeState::default())); + Self { + queue: queue.receiver(), + sender: queue.sender(), + nodes: Mutex::new(nodes), + next_node: std::sync::atomic::AtomicU64::new(1), + rng: Mutex::new(Rng::new(seed)), + allow_system_thread: AtomicBool::new(false), + } + } + + fn set_allow_system_thread(&self, allowed: bool) { + self.allow_system_thread.store(allowed, Ordering::Relaxed); + } + + fn create_node(&self) -> NodeId { + let id = NodeId(self.next_node.fetch_add(1, Ordering::Relaxed)); + self.nodes + .lock() + .expect("nodes poisoned") + .insert(id, Arc::new(NodeState::default())); + id + } + + fn pause(&self, node: NodeId) { + self.node_state(node).paused.store(true, Ordering::Relaxed); + } + + fn resume(&self, node: NodeId) { + let state = self.node_state(node); + state.paused.store(false, Ordering::Relaxed); + + let mut paused = state.paused_queue.lock().expect("paused queue poisoned"); + for runnable in paused.drain(..) { + self.sender.send(runnable); + } + } + + fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.node_state(node); + + let sender = self.sender.clone(); + let (runnable, task) = async_task::Builder::new() + .metadata(node) + .spawn(move |_| future, move |runnable| sender.send(runnable)); + runnable.schedule(); + + JoinHandle { task } + } + + #[track_caller] + fn block_on(&self, future: F) -> F::Output { + let _waiter = WaiterGuard::new(&self.queue, thread::current()); + + let sender = self.sender.clone(); + let (runnable, task) = unsafe { + async_task::Builder::new() + .metadata(NodeId::MAIN) + .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) + }; + runnable.schedule(); + + loop { + self.run_all_ready(); + if task.is_finished() { + return task.now_or_never().expect("finished task should resolve"); + } + + if self.allow_system_thread.load(Ordering::Relaxed) { + thread::park_timeout(Duration::from_millis(1)); + } else { + panic!("no runnable tasks; all simulated tasks are blocked"); + } + } + } + + fn run_all_ready(&self) { + while let Some(runnable) = self.queue.try_recv_random(&self.rng) { + let node = *runnable.metadata(); + let state = self.node_state(node); + if state.paused.load(Ordering::Relaxed) { + state.paused_queue.lock().expect("paused queue poisoned").push(runnable); + continue; + } + let result = std::panic::catch_unwind(AssertUnwindSafe(|| runnable.run())); + if let Err(payload) = result { + std::panic::resume_unwind(payload); + } + } + } + + fn node_state(&self, node: NodeId) -> Arc { + self.nodes + .lock() + .expect("nodes poisoned") + .get(&node) + .cloned() + .unwrap_or_else(|| panic!("unknown simulated node {node}")) + } +} + +#[derive(Clone, Default)] +struct NodeState { + paused: Arc, + paused_queue: Arc>>, +} + +pub async fn yield_now() { + YieldNow { yielded: false }.await +} + +struct YieldNow { + yielded: bool, +} + +impl Future for YieldNow { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if self.yielded { + Poll::Ready(()) + } else { + self.yielded = true; + cx.waker().wake_by_ref(); + Poll::Pending + } + } +} + +struct WaiterGuard<'a> { + receiver: &'a Receiver, +} + +impl<'a> WaiterGuard<'a> { + fn new(receiver: &'a Receiver, thread: Thread) -> Self { + receiver.set_waiter(Some(thread)); + Self { receiver } + } +} + +impl Drop for WaiterGuard<'_> { + fn drop(&mut self) { + self.receiver.set_waiter(None); + } +} + +struct Queue { + inner: Arc, +} + +#[derive(Clone)] +struct Sender { + inner: Arc, +} + +#[derive(Clone)] +struct Receiver { + inner: Arc, +} + +struct QueueInner { + queue: Mutex>, + waiter: Mutex>, +} + +impl Queue { + fn new() -> Self { + Self { + inner: Arc::new(QueueInner { + queue: Mutex::new(Vec::new()), + waiter: Mutex::new(None), + }), + } + } + + fn sender(&self) -> Sender { + Sender { + inner: self.inner.clone(), + } + } + + fn receiver(&self) -> Receiver { + Receiver { + inner: self.inner.clone(), + } + } +} + +impl Sender { + fn send(&self, runnable: Runnable) { + self.inner.queue.lock().expect("run queue poisoned").push(runnable); + if let Some(thread) = self.inner.waiter.lock().expect("waiter poisoned").as_ref() { + thread.unpark(); + } + } +} + +impl Receiver { + fn set_waiter(&self, thread: Option) { + *self.inner.waiter.lock().expect("waiter poisoned") = thread; + } + + fn try_recv_random(&self, rng: &Mutex) -> Option { + let mut queue = self.inner.queue.lock().expect("run queue poisoned"); + if queue.is_empty() { + return None; + } + let idx = rng.lock().expect("rng poisoned").index(queue.len()); + Some(queue.swap_remove(idx)) + } +} + +#[cfg(test)] +mod tests { + use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }; + + use super::*; + + #[test] + fn paused_node_does_not_run_until_resumed() { + let mut runtime = Runtime::new(DstSeed(1)).unwrap(); + let node = runtime.create_node(); + runtime.pause(node); + + let runs = Arc::new(AtomicUsize::new(0)); + let task_runs = Arc::clone(&runs); + let task = runtime.spawn_on(node, async move { + task_runs.fetch_add(1, Ordering::SeqCst); + 7 + }); + + runtime.block_on(async { + yield_now().await; + }); + assert_eq!(runs.load(Ordering::SeqCst), 0); + + runtime.resume(node); + assert_eq!(runtime.block_on(task), 7); + assert_eq!(runs.load(Ordering::SeqCst), 1); + } + + #[test] + fn handle_can_spawn_onto_node_from_simulated_task() { + let mut runtime = Runtime::new(DstSeed(2)).unwrap(); + let handle = runtime.handle(); + + let value = runtime.block_on(async move { + let node = handle.create_node(); + handle.spawn_on(node, async { 11 }).await + }); + + assert_eq!(value, 11); + } +} diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs new file mode 100644 index 00000000000..b9d2b682cb9 --- /dev/null +++ b/crates/dst/src/sim/mod.rs @@ -0,0 +1,33 @@ +//! Local simulation shim for the DST crate. +//! +//! This module is deliberately small, but its executor shape follows madsim's: +//! futures are scheduled as runnable tasks and the ready queue is sampled by a +//! deterministic RNG instead of being driven by a package-level async runtime. + +mod executor; +mod rng; + +use std::time::Duration; + +pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; +pub use rng::Rng; + +use crate::seed::DstSeed; + +pub(crate) use rng::DecisionSource; + +pub(crate) type RuntimeHandle = spacetimedb_core::runtime::Handle; +pub(crate) type RuntimeGuard = spacetimedb_core::runtime::Runtime; + +pub(crate) fn current_handle_or_new_runtime() -> anyhow::Result<(RuntimeHandle, Option)> { + spacetimedb_core::runtime::current_handle_or_new_runtime() +} + +pub(crate) fn advance_time(_duration: Duration) { + // This is a hook, not wall-clock sleep. A future simulator layer can advance + // virtual time here while keeping targets on the same API. +} + +pub(crate) fn decision_source(seed: DstSeed) -> DecisionSource { + DecisionSource::new(seed) +} diff --git a/crates/dst/src/sim/rng.rs b/crates/dst/src/sim/rng.rs new file mode 100644 index 00000000000..9b19e527002 --- /dev/null +++ b/crates/dst/src/sim/rng.rs @@ -0,0 +1,74 @@ +use std::sync::atomic::{AtomicU64, Ordering}; + +use crate::seed::DstSeed; + +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +#[derive(Clone, Debug)] +pub struct Rng { + state: u64, +} + +impl Rng { + pub fn new(seed: DstSeed) -> Self { + Self { + state: splitmix64(seed.0), + } + } + + pub fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(GAMMA); + splitmix64(self.state) + } + + pub fn index(&mut self, len: usize) -> usize { + assert!(len > 0, "len must be non-zero"); + (self.next_u64() as usize) % len + } + + pub fn sample_probability(&mut self, probability: f64) -> bool { + probability_sample(self.next_u64(), probability) + } +} + +#[derive(Debug)] +pub(crate) struct DecisionSource { + state: AtomicU64, +} + +impl DecisionSource { + pub(crate) fn new(seed: DstSeed) -> Self { + Self { + state: AtomicU64::new(splitmix64(seed.0)), + } + } + + pub(crate) fn sample_probability(&self, probability: f64) -> bool { + probability_sample(self.next_u64(), probability) + } + + fn next_u64(&self) -> u64 { + let state = self.state.fetch_add(GAMMA, Ordering::Relaxed); + splitmix64(state) + } +} + +fn probability_sample(value: u64, probability: f64) -> bool { + if probability <= 0.0 { + return false; + } + if probability >= 1.0 { + return true; + } + + // Use the top 53 bits to build an exactly representable f64 in [0, 1). + let unit = (value >> 11) as f64 * (1.0 / ((1u64 << 53) as f64)); + unit < probability +} + +fn splitmix64(mut x: u64) -> u64 { + x = x.wrapping_add(GAMMA); + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} diff --git a/crates/dst/src/targets/buggified_repo.rs b/crates/dst/src/targets/buggified_repo.rs index 57796b7d3e9..b34e5027eb0 100644 --- a/crates/dst/src/targets/buggified_repo.rs +++ b/crates/dst/src/targets/buggified_repo.rs @@ -13,7 +13,7 @@ use spacetimedb_commitlog::{ segment::FileLike, }; -use crate::{config::CommitlogFaultProfile, workload::commitlog_ops::DiskFaultSummary}; +use crate::{config::CommitlogFaultProfile, seed::DstSeed, sim, workload::commitlog_ops::DiskFaultSummary}; const INJECTED_DISK_ERROR_PREFIX: &str = "dst injected disk "; @@ -123,10 +123,10 @@ pub(crate) struct BuggifiedRepo { } impl BuggifiedRepo { - pub(crate) fn new(inner: R, config: CommitlogFaultConfig) -> Self { + pub(crate) fn new(inner: R, config: CommitlogFaultConfig, seed: DstSeed) -> Self { Self { inner, - faults: FaultController::new(config), + faults: FaultController::new(config, seed), } } @@ -341,15 +341,17 @@ impl SegmentReader for BuggifiedReader { struct FaultController { config: CommitlogFaultConfig, counters: Arc, + decisions: Arc, armed: Arc, suspended: Arc, } impl FaultController { - fn new(config: CommitlogFaultConfig) -> Self { + fn new(config: CommitlogFaultConfig, seed: DstSeed) -> Self { Self { config, counters: Arc::default(), + decisions: Arc::new(sim::decision_source(seed)), armed: Arc::new(AtomicBool::new(false)), suspended: Arc::default(), } @@ -379,10 +381,7 @@ impl FaultController { } else { Duration::from_millis(1) }; - #[cfg(all(simulation, madsim))] - madsim::time::advance(latency); - #[cfg(not(all(simulation, madsim)))] - let _ = latency; + sim::advance_time(latency); } } @@ -412,15 +411,7 @@ impl FaultController { return false; } - #[cfg(simulation)] - { - madsim::buggify::buggify_with_prob(probability) - } - #[cfg(not(simulation))] - { - let _ = probability; - false - } + self.decisions.sample_probability(probability) } fn summary(&self) -> DiskFaultSummary { diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index b1dca7d2fdb..91c522fbd42 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -2,11 +2,7 @@ use std::{future::Future, pin::Pin}; -use crate::{ - config::RunConfig, - seed::DstSeed, - workload::{module_ops::HostScenarioId, table_ops::TableScenarioId}, -}; +use crate::{config::RunConfig, seed::DstSeed, workload::table_ops::TableScenarioId}; /// Descriptor contract: CLI talks to this, not per-target ad hoc handlers. pub trait TargetDescriptor { @@ -118,37 +114,8 @@ fn format_relational_db_commitlog_outcome( outcome.disk_faults.fsync_error, outcome.disk_faults.open_error, outcome.disk_faults.metadata_error, - outcome.runtime.known_tokio_tasks_scheduled, + outcome.runtime.known_runtime_tasks_scheduled, outcome.runtime.durability_actors_started, alive_tasks ) } - -pub struct StandaloneHostDescriptor; - -impl TargetDescriptor for StandaloneHostDescriptor { - const NAME: &'static str = "standalone_host"; - type Scenario = HostScenarioId; - - fn prepare(_seed: DstSeed, _scenario: &Self::Scenario, _config: &RunConfig) -> anyhow::Result<()> { - crate::targets::standalone_host::prepare_generated_run() - } - - fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { - Box::pin(async move { - let outcome = - crate::targets::standalone_host::run_generated_with_config_and_scenario(seed, scenario, config).await?; - Ok(format!( - "ok target={} seed={} steps={} reducer_calls={} waits={} reopens={} noops={} expected_errors={}", - Self::NAME, - seed.0, - outcome.steps_executed, - outcome.reducer_calls, - outcome.scheduler_waits, - outcome.reopens, - outcome.noops, - outcome.expected_errors - )) - }) - } -} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index a619c1a8be5..52a941ec8f1 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -3,4 +3,3 @@ pub(crate) mod buggified_repo; pub mod descriptor; pub mod relational_db_commitlog; -pub mod standalone_host; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 100c65ea55f..1bf0e4d2fb4 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -37,6 +37,7 @@ use crate::{ }, schema::{SchemaPlan, SimRow}, seed::DstSeed, + sim, targets::buggified_repo::{is_injected_disk_error_text, BuggifiedRepo, CommitlogFaultConfig}, workload::{ commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, @@ -193,7 +194,7 @@ impl RunStats { fn runtime_summary(&self) -> RuntimeSummary { RuntimeSummary { - known_tokio_tasks_scheduled: self.runtime.durability_actors_started, + known_runtime_tasks_scheduled: self.runtime.durability_actors_started, durability_actors_started: self.runtime.durability_actors_started, runtime_alive_tasks: runtime_alive_tasks(), } @@ -213,10 +214,10 @@ struct RelationalDbEngine { last_observed_durable_offset: Option, durability: Arc, durability_opts: spacetimedb_durability::local::Options, - runtime_handle: tokio::runtime::Handle, + runtime_handle: sim::RuntimeHandle, commitlog_repo: StressCommitlogRepo, stats: RunStats, - _runtime_guard: Option, + _runtime_guard: Option, } impl RelationalDbEngine { @@ -994,7 +995,7 @@ impl RelationalDbEngine { .map_err(|err| format!("durability wait for tx offset {target_offset} failed: {err}"))?; } } else if forced { - tokio::task::yield_now().await; + sim::yield_now().await; } self.refresh_observed_durable_offset(forced) } @@ -1386,27 +1387,21 @@ type InMemoryCommitlogDurability = Local; struct RelationalDbBootstrap { db: RelationalDB, - runtime_handle: tokio::runtime::Handle, + runtime_handle: sim::RuntimeHandle, commitlog_repo: StressCommitlogRepo, durability: Arc, durability_opts: spacetimedb_durability::local::Options, - runtime_guard: Option, + runtime_guard: Option, } fn bootstrap_relational_db( seed: DstSeed, fault_profile: CommitlogFaultProfile, ) -> anyhow::Result { - let (runtime_handle, runtime_guard) = if let Ok(handle) = tokio::runtime::Handle::try_current() { - (handle, None) - } else { - let runtime = tokio::runtime::Runtime::new()?; - (runtime.handle().clone(), Some(runtime)) - }; + let (runtime_handle, runtime_guard) = sim::current_handle_or_new_runtime()?; let fault_config = CommitlogFaultConfig::for_profile(fault_profile); - configure_simulation_buggify(fault_config.enabled()); - let commitlog_repo = BuggifiedRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024), fault_config); + let commitlog_repo = BuggifiedRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024), fault_config, seed.fork(702)); let durability_opts = commitlog_stress_options(seed.fork(701)); let durability = Arc::new( InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime_handle.clone(), durability_opts) @@ -1449,23 +1444,9 @@ fn commitlog_stress_options(seed: DstSeed) -> spacetimedb_durability::local::Opt opts } -fn configure_simulation_buggify(enabled: bool) { - #[cfg(simulation)] - { - if enabled { - madsim::buggify::enable(); - } else { - madsim::buggify::disable(); - } - } - #[cfg(not(simulation))] - let _ = enabled; -} - fn runtime_alive_tasks() -> Option { - // The madsim runtime exposes live task metrics on `Runtime`, but the target - // only receives Tokio-compatible handles. Keep this explicit instead of - // reporting madsim-tokio's dummy zero-valued metrics as real data. + // The shim only exposes Tokio-compatible handles today. Keep this explicit + // until the target owns a simulator/runtime that can report live task state. None } diff --git a/crates/dst/src/targets/standalone_host.rs b/crates/dst/src/targets/standalone_host.rs deleted file mode 100644 index d7429ef9463..00000000000 --- a/crates/dst/src/targets/standalone_host.rs +++ /dev/null @@ -1,381 +0,0 @@ -//! Standalone host DST target (single scenario, no migration/subscriptions). - -use std::{ - path::PathBuf, - sync::{Arc, OnceLock}, - time::{Instant, SystemTime, UNIX_EPOCH}, -}; - -use bytes::Bytes; -use spacetimedb_client_api::{ - auth::SpacetimeAuth, routes::subscribe::WebSocketOptions, ControlStateReadAccess, ControlStateWriteAccess, - NodeDelegate, -}; -use spacetimedb_client_api_messages::websocket::v1 as ws_v1; -use spacetimedb_core::{ - client::{ClientActorId, ClientConfig, ClientConnection}, - config::CertificateAuthority, - db::{Config as DbConfig, Storage}, - host::FunctionArgs, - messages::control_db::HostType, - util::jobs::JobCores, -}; -use spacetimedb_lib::{ConnectionId, Identity}; -use spacetimedb_paths::{RootDir, SpacetimePaths}; -use spacetimedb_sats::ProductValue; -use spacetimedb_schema::{auto_migrate::MigrationPolicy, def::FunctionVisibility}; -use spacetimedb_standalone::{StandaloneEnv, StandaloneOptions}; -use tracing::trace; - -use crate::{ - client::SessionId, - config::RunConfig, - core::{self, StreamingProperties, TargetEngine}, - seed::DstSeed, - workload::module_ops::{ - HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome, ModuleWorkloadSource, - }, -}; - -pub type StandaloneHostOutcome = ModuleWorkloadOutcome; - -pub fn prepare_generated_run() -> anyhow::Result<()> { - let _ = compiled_module()?; - Ok(()) -} - -pub async fn run_generated_with_config_and_scenario( - seed: DstSeed, - scenario: HostScenarioId, - config: RunConfig, -) -> anyhow::Result { - run_once_async(seed, scenario, config).await -} - -async fn run_once_async( - seed: DstSeed, - scenario: HostScenarioId, - config: RunConfig, -) -> anyhow::Result { - let module = compiled_module()?; - let reducers = extract_reducer_specs(module.clone()).await?; - let generator = ModuleWorkloadSource::new(seed, scenario, reducers, config.max_interactions_or_default(usize::MAX)); - let engine = StandaloneHostEngine::new(seed, module).await?; - core::run_streaming(generator, engine, NoopHostProperties, config).await -} - -#[derive(Clone)] -struct CompiledModuleInfo { - program_bytes: Bytes, - host_type: HostType, -} - -fn compiled_module() -> anyhow::Result> { - static CACHE: OnceLock> = OnceLock::new(); - if let Some(cached) = CACHE.get() { - return Ok(cached.clone()); - } - let module_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../modules/module-test"); - let (path, host_type) = spacetimedb_cli::build(&module_root, Some(PathBuf::from("src")).as_deref(), true, None)?; - let host_type: HostType = host_type.parse()?; - let program_bytes = std::fs::read(path)?; - let compiled = Arc::new(CompiledModuleInfo { - program_bytes: program_bytes.into(), - host_type, - }); - let _ = CACHE.set(compiled.clone()); - Ok(CACHE.get().expect("cache set or raced").clone()) -} - -async fn extract_reducer_specs(module: Arc) -> anyhow::Result> { - let module_def = spacetimedb_core::host::extract_schema( - module.program_bytes.clone().to_vec().into_boxed_slice(), - module.host_type, - ) - .await?; - Ok(module_def - .reducers() - .filter(|reducer| reducer.visibility == FunctionVisibility::ClientCallable) - .map(|reducer| ModuleReducerSpec { - name: reducer.name.to_string(), - params: reducer - .params - .elements - .iter() - .map(|arg| arg.algebraic_type.clone()) - .collect::>(), - }) - .collect::>()) -} - -struct HostSession { - _env: Arc, - client: ClientConnection, - db_identity: Identity, -} - -struct StandaloneHostEngine { - root_dir: RootDir, - session: Option, - module: Arc, - seed: DstSeed, - session_generation: u64, - step: usize, - reducer_calls: usize, - scheduler_waits: usize, - reopens: usize, - noops: usize, - expected_errors: usize, -} - -impl StandaloneHostEngine { - async fn new(seed: DstSeed, module: Arc) -> anyhow::Result { - let root_dir = RootDir(std::env::temp_dir().join(format!( - "spacetimedb-dst-standalone-host-{}-{}-{}", - seed.0, - std::process::id(), - SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos() - ))); - let _ = std::fs::remove_dir_all(&root_dir); - let session = open_session( - &root_dir, - &module, - None, - connection_id_for_session(seed, SessionId::ZERO, 0), - ) - .await - .map_err(anyhow::Error::msg)?; - Ok(Self { - root_dir, - session: Some(session), - module, - seed, - session_generation: 1, - step: 0, - reducer_calls: 0, - scheduler_waits: 0, - reopens: 0, - noops: 0, - expected_errors: 0, - }) - } - - async fn execute(&mut self, interaction: &ModuleInteraction) -> Result<(), String> { - self.step = self.step.saturating_add(1); - match interaction { - ModuleInteraction::CallReducer { session, reducer, args } => { - if *session != SessionId::ZERO { - return Err(format!("standalone-host target has no session for {session}")); - } - self.reducer_calls = self.reducer_calls.saturating_add(1); - let request_id = (self.step as u32).saturating_sub(1); - let product = ProductValue::from_iter(args.iter().cloned()); - let payload = spacetimedb_sats::bsatn::to_vec(&product).map_err(|e| e.to_string())?; - let res = self - .session - .as_mut() - .ok_or_else(|| "host session missing".to_string())? - .client - .call_reducer( - reducer, - FunctionArgs::Bsatn(payload.into()), - request_id, - Instant::now(), - ws_v1::CallReducerFlags::FullUpdate, - ) - .await; - match res { - Ok(_) => Ok(()), - Err(err) => { - let msg = err.to_string(); - if is_expected_error(reducer, &msg) { - self.expected_errors = self.expected_errors.saturating_add(1); - Ok(()) - } else { - Err(format!("unexpected reducer error reducer={reducer}: {msg}")) - } - } - } - } - ModuleInteraction::WaitScheduled { millis } => { - self.scheduler_waits = self.scheduler_waits.saturating_add(1); - tokio::time::sleep(std::time::Duration::from_millis(*millis)).await; - Ok(()) - } - ModuleInteraction::CloseReopen => { - self.reopens = self.reopens.saturating_add(1); - let db_identity = self - .session - .as_ref() - .ok_or_else(|| "host session missing".to_string())? - .db_identity; - let old = self.session.take(); - drop(old); - let connection_id = connection_id_for_session(self.seed, SessionId::ZERO, self.session_generation); - self.session_generation = self.session_generation.saturating_add(1); - self.session = - Some(open_session(&self.root_dir, &self.module, Some(db_identity), connection_id).await?); - Ok(()) - } - ModuleInteraction::NoOp => { - self.noops = self.noops.saturating_add(1); - Ok(()) - } - } - } - - fn outcome(&self) -> StandaloneHostOutcome { - StandaloneHostOutcome { - steps_executed: self.step, - reducer_calls: self.reducer_calls, - scheduler_waits: self.scheduler_waits, - reopens: self.reopens, - noops: self.noops, - expected_errors: self.expected_errors, - } - } -} - -impl TargetEngine for StandaloneHostEngine { - type Observation = (); - type Outcome = StandaloneHostOutcome; - type Error = String; - - #[allow(clippy::manual_async_fn)] - fn execute_interaction<'a>( - &'a mut self, - interaction: &'a ModuleInteraction, - ) -> impl std::future::Future> + 'a { - async move { - trace!(?interaction, "standalone_host interaction"); - self.execute(interaction).await - } - } - - fn finish(&mut self) {} - - #[allow(clippy::manual_async_fn)] - fn collect_outcome<'a>(&'a mut self) -> impl std::future::Future> + 'a { - async move { Ok(self.outcome()) } - } -} - -struct NoopHostProperties; - -impl StreamingProperties for NoopHostProperties { - fn observe( - &mut self, - _engine: &StandaloneHostEngine, - _interaction: &ModuleInteraction, - _observation: &(), - ) -> Result<(), String> { - Ok(()) - } - - fn finish(&mut self, _engine: &StandaloneHostEngine, _outcome: &StandaloneHostOutcome) -> Result<(), String> { - Ok(()) - } -} - -fn is_expected_error(_reducer: &str, msg: &str) -> bool { - msg.contains("permission denied") -} - -fn connection_id_for_session(seed: DstSeed, session: SessionId, handle_generation: u64) -> ConnectionId { - let base = 1_000u64 - .saturating_add((session.client.as_u32() as u64).saturating_mul(1_000_000)) - .saturating_add((session.generation as u64).saturating_mul(10_000)) - .saturating_add(handle_generation.saturating_mul(2)); - let high = seed.fork(base).0 as u128; - let low = seed.fork(base.saturating_add(1)).0 as u128; - let id = (high << 64) | low; - ConnectionId::from_u128(id.max(1)) -} - -async fn open_session( - root_dir: &RootDir, - module: &CompiledModuleInfo, - maybe_db_identity: Option, - connection_id: ConnectionId, -) -> Result { - let paths = SpacetimePaths::from_root_dir(root_dir); - let certs = CertificateAuthority::in_cli_config_dir(&paths.cli_config_dir); - let env = StandaloneEnv::init( - StandaloneOptions { - db_config: DbConfig { - storage: Storage::Disk, - page_pool_max_size: None, - }, - websocket: WebSocketOptions::default(), - v8_heap_policy: Default::default(), - }, - &certs, - paths.data_dir.into(), - JobCores::without_pinned_cores(), - ) - .await - .map_err(|e| format!("standalone init failed: {e:#}"))?; - - let caller_identity = Identity::ZERO; - let db_identity = match maybe_db_identity { - Some(identity) => identity, - None => { - SpacetimeAuth::alloc(&env) - .await - .map_err(|e| format!("db identity allocation failed: {e:#?}"))? - .claims - .identity - } - }; - - if env - .get_database_by_identity(&db_identity) - .await - .map_err(|e| format!("database lookup failed: {e:#}"))? - .is_none() - { - env.publish_database( - &caller_identity, - spacetimedb_client_api::DatabaseDef { - database_identity: db_identity, - program_bytes: module.program_bytes.clone(), - num_replicas: None, - host_type: module.host_type, - parent: None, - organization: None, - }, - MigrationPolicy::Compatible, - ) - .await - .map_err(|e| format!("publish module failed: {e:#}"))?; - } - - let database = env - .get_database_by_identity(&db_identity) - .await - .map_err(|e| format!("database lookup after publish failed: {e:#}"))? - .ok_or_else(|| "database not found after publish".to_string())?; - let replica = env - .get_leader_replica_by_database(database.id) - .await - .ok_or_else(|| "leader replica not found".to_string())?; - let host = env - .leader(database.id) - .await - .map_err(|e| format!("leader host unavailable: {e:#}"))?; - let module_rx = host - .module_watcher() - .await - .map_err(|e| format!("module watcher failed: {e:#}"))?; - let client_id = ClientActorId { - identity: caller_identity, - connection_id, - name: env.client_actor_index().next_client_name(), - }; - let client = ClientConnection::dummy(client_id, ClientConfig::for_test(), replica.id, module_rx); - Ok(HostSession { - _env: env, - client, - db_identity, - }) -} diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index cdaac71adb8..78382fb6372 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -125,7 +125,7 @@ pub struct TransactionSummary { #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct RuntimeSummary { - pub known_tokio_tasks_scheduled: usize, + pub known_runtime_tasks_scheduled: usize, pub durability_actors_started: usize, pub runtime_alive_tasks: Option, } diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs index ab6eb8c0b17..52482e737f1 100644 --- a/crates/dst/src/workload/mod.rs +++ b/crates/dst/src/workload/mod.rs @@ -1,6 +1,5 @@ //! Shared workload generators reused by multiple DST targets. pub mod commitlog_ops; -pub mod module_ops; pub(crate) mod strategy; pub mod table_ops; diff --git a/crates/dst/src/workload/module_ops/generation.rs b/crates/dst/src/workload/module_ops/generation.rs deleted file mode 100644 index c3f8a2e8e77..00000000000 --- a/crates/dst/src/workload/module_ops/generation.rs +++ /dev/null @@ -1,128 +0,0 @@ -use crate::{ - client::SessionId, - core::WorkloadSource, - schema::generate_value_for_type, - seed::{DstRng, DstSeed}, - workload::strategy::{Index, Strategy, Weighted}, -}; - -use super::{HostScenarioId, ModuleInteraction, ModuleReducerSpec}; - -const MAX_REGEN_ATTEMPTS: usize = 16; - -#[derive(Clone, Copy, Debug)] -enum ActionKind { - Reducer, - Wait, - Reopen, -} - -/// Deterministic source for standalone-host interactions. -pub(crate) struct ModuleWorkloadSource { - scenario: HostScenarioId, - reducers: Vec, - rng: DstRng, - target_interactions: usize, - emitted: usize, -} - -impl ModuleWorkloadSource { - pub fn new( - seed: DstSeed, - scenario: HostScenarioId, - reducers: Vec, - target_interactions: usize, - ) -> Self { - Self { - scenario, - reducers, - rng: seed.fork(300).rng(), - target_interactions, - emitted: 0, - } - } - - pub fn request_finish(&mut self) { - self.target_interactions = self.emitted; - } - - fn choose_action(&mut self) -> ActionKind { - match self.scenario { - HostScenarioId::HostSmoke => Weighted::new(vec![ - (85, ActionKind::Reducer), - (10, ActionKind::Wait), - (5, ActionKind::Reopen), - ]) - .sample(&mut self.rng), - } - } - - fn generate_reducer_interaction(&mut self) -> Option { - if self.reducers.is_empty() { - return None; - } - let idx = Index::new(self.reducers.len()).sample(&mut self.rng); - let spec = &self.reducers[idx]; - let mut args = Vec::with_capacity(spec.params.len()); - for (arg_index, ty) in spec.params.iter().enumerate() { - if !supports_generation(ty) { - return None; - } - args.push(generate_value_for_type(&mut self.rng, ty, arg_index)); - } - Some(ModuleInteraction::CallReducer { - session: SessionId::ZERO, - reducer: spec.name.clone(), - args, - }) - } - - fn generate_next(&mut self) -> ModuleInteraction { - for _ in 0..MAX_REGEN_ATTEMPTS { - let next = match self.choose_action() { - ActionKind::Reducer => self.generate_reducer_interaction(), - ActionKind::Wait => Some(ModuleInteraction::WaitScheduled { millis: 1_200 }), - ActionKind::Reopen => Some(ModuleInteraction::CloseReopen), - }; - if let Some(next) = next { - return next; - } - } - ModuleInteraction::NoOp - } -} - -fn supports_generation(ty: &spacetimedb_sats::AlgebraicType) -> bool { - use spacetimedb_sats::AlgebraicType; - matches!( - ty, - AlgebraicType::Bool - | AlgebraicType::I8 - | AlgebraicType::U8 - | AlgebraicType::I16 - | AlgebraicType::U16 - | AlgebraicType::I32 - | AlgebraicType::U32 - | AlgebraicType::I64 - | AlgebraicType::U64 - | AlgebraicType::I128 - | AlgebraicType::U128 - | AlgebraicType::String - ) -} - -impl WorkloadSource for ModuleWorkloadSource { - type Interaction = ModuleInteraction; - - fn next_interaction(&mut self) -> Option { - if self.emitted >= self.target_interactions { - return None; - } - self.emitted += 1; - Some(self.generate_next()) - } - - fn request_finish(&mut self) { - Self::request_finish(self); - } -} diff --git a/crates/dst/src/workload/module_ops/mod.rs b/crates/dst/src/workload/module_ops/mod.rs deleted file mode 100644 index a2e20cd4d12..00000000000 --- a/crates/dst/src/workload/module_ops/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Workload for standalone host/module testing. - -mod generation; -mod types; - -pub(crate) use generation::ModuleWorkloadSource; -pub use types::{HostScenarioId, ModuleInteraction, ModuleReducerSpec, ModuleWorkloadOutcome}; diff --git a/crates/dst/src/workload/module_ops/types.rs b/crates/dst/src/workload/module_ops/types.rs deleted file mode 100644 index 77d063a3b2a..00000000000 --- a/crates/dst/src/workload/module_ops/types.rs +++ /dev/null @@ -1,43 +0,0 @@ -use spacetimedb_sats::AlgebraicType; - -use crate::client::SessionId; - -/// Single v1 scenario for standalone host target. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub enum HostScenarioId { - #[default] - HostSmoke, -} - -/// Reducer metadata used by the typed argument generator. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ModuleReducerSpec { - pub name: String, - pub params: Vec, -} - -/// One standalone-host interaction. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum ModuleInteraction { - CallReducer { - session: SessionId, - reducer: String, - args: Vec, - }, - WaitScheduled { - millis: u64, - }, - CloseReopen, - NoOp, -} - -/// Run summary for standalone-host target. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ModuleWorkloadOutcome { - pub steps_executed: usize, - pub reducer_calls: usize, - pub scheduler_waits: usize, - pub reopens: usize, - pub noops: usize, - pub expected_errors: usize, -} diff --git a/crates/dst/tests/madsim_tcp.rs b/crates/dst/tests/madsim_tcp.rs deleted file mode 100644 index d5e587d9fc8..00000000000 --- a/crates/dst/tests/madsim_tcp.rs +++ /dev/null @@ -1,39 +0,0 @@ -#![cfg(all(simulation, madsim))] - -use std::{net::SocketAddr, sync::Arc}; - -use tokio::{ - io::{AsyncReadExt, AsyncWriteExt}, - sync::Barrier, -}; - -#[test] -fn tcp_round_trip_over_madsim_tokio() { - let runtime = madsim::runtime::Runtime::new(); - let server_addr: SocketAddr = "10.0.0.1:1".parse().unwrap(); - let client_addr: SocketAddr = "10.0.0.2:1".parse().unwrap(); - - let server = runtime.create_node().ip(server_addr.ip()).build(); - let client = runtime.create_node().ip(client_addr.ip()).build(); - let ready = Arc::new(Barrier::new(2)); - - let server_ready = ready.clone(); - let server_task = server.spawn(async move { - let listener = tokio::net::TcpListener::bind(server_addr).await.unwrap(); - server_ready.wait().await; - let (mut stream, _) = listener.accept().await.unwrap(); - stream.write_all(b"pong").await.unwrap(); - stream.flush().await.unwrap(); - }); - - let client_task = client.spawn(async move { - ready.wait().await; - let mut stream = tokio::net::TcpStream::connect(server_addr).await.unwrap(); - let mut response = [0; 4]; - stream.read_exact(&mut response).await.unwrap(); - assert_eq!(&response, b"pong"); - }); - - runtime.block_on(server_task).unwrap(); - runtime.block_on(client_task).unwrap(); -} diff --git a/crates/durability/Cargo.toml b/crates/durability/Cargo.toml index 02f23643664..0ea8022fcbe 100644 --- a/crates/durability/Cargo.toml +++ b/crates/durability/Cargo.toml @@ -23,13 +23,13 @@ spacetimedb-fs-utils.workspace = true spacetimedb-paths.workspace = true spacetimedb-sats.workspace = true thiserror.workspace = true -tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } +tokio.workspace = true tracing.workspace = true [dev-dependencies] spacetimedb-commitlog = { workspace = true, features = ["test"] } tempfile.workspace = true -tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } +tokio.workspace = true [lints] workspace = true diff --git a/crates/io/Cargo.toml b/crates/io/Cargo.toml index 02b6482302f..e6cfc9a14f5 100644 --- a/crates/io/Cargo.toml +++ b/crates/io/Cargo.toml @@ -7,7 +7,7 @@ license-file = "LICENSE" description = "Filesystem and network IO facade for SpacetimeDB crates" [dependencies] -tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } +tokio.workspace = true [lints] workspace = true diff --git a/crates/io/src/lib.rs b/crates/io/src/lib.rs index 50179e7221f..f00cdf90b3f 100644 --- a/crates/io/src/lib.rs +++ b/crates/io/src/lib.rs @@ -1,8 +1,7 @@ //! Narrow facade for SpacetimeDB-owned async IO boundaries. //! -//! Production builds use Tokio through the `madsim-tokio` compatibility crate. -//! Simulation builds use the simulator implementations exposed by that same -//! compatibility crate. +//! This crate currently re-exports the Tokio filesystem, IO, and network APIs +//! that SpacetimeDB code is allowed to depend on directly. //! //! This crate is intentionally small. It is a migration point for filesystem and //! network APIs reached by deterministic simulation tests, not a general runtime From f5197c4d2fb498d35d4659173137553bd94af203 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 5 May 2026 14:04:51 +0530 Subject: [PATCH 28/74] improved simulator --- Cargo.lock | 1 + crates/dst/Cargo.toml | 1 + crates/dst/src/main.rs | 18 +- .../buggified_repo.rs => sim/commitlog.rs} | 82 +++-- crates/dst/src/sim/executor.rs | 138 +++++++-- crates/dst/src/sim/mod.rs | 8 +- crates/dst/src/sim/rng.rs | 289 +++++++++++++++++- crates/dst/src/sim/system_thread.rs | 64 ++++ crates/dst/src/sim/time.rs | 272 +++++++++++++++++ crates/dst/src/targets/mod.rs | 1 - .../src/targets/relational_db_commitlog.rs | 29 +- 11 files changed, 834 insertions(+), 69 deletions(-) rename crates/dst/src/{targets/buggified_repo.rs => sim/commitlog.rs} (86%) create mode 100644 crates/dst/src/sim/system_thread.rs create mode 100644 crates/dst/src/sim/time.rs diff --git a/Cargo.lock b/Cargo.lock index c4c53445e5d..c973141026c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8313,6 +8313,7 @@ dependencies = [ "async-task", "clap 4.5.50", "futures-util", + "libc", "spacetimedb-commitlog", "spacetimedb-core", "spacetimedb-datastore", diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 1518cda9195..d425f3d977f 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -19,6 +19,7 @@ anyhow.workspace = true async-task = "4.4" clap.workspace = true futures-util.workspace = true +libc = "0.2" spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0" } spacetimedb-commitlog = { workspace = true, features = ["test"] } diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index 7937350d01b..01d0961177c 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -122,14 +122,18 @@ fn run_prepared_target( seed: DstSeed, scenario: D::Scenario, config: RunConfig, -) -> anyhow::Result<()> { +) -> anyhow::Result<()> +where + D: 'static, + D::Scenario: Send + 'static, +{ D::prepare(seed, &scenario, &config)?; - let mut runtime = spacetimedb_dst::sim::Runtime::new(seed)?; - // RelationalDB durability still runs on core's production runtime boundary. - // Let those external tasks wake the DST executor while this target is being - // migrated toward a fully local simulator. - runtime.set_allow_system_thread(true); - runtime.block_on(run_target::(seed, scenario, config)) + std::thread::spawn(move || { + let mut runtime = spacetimedb_dst::sim::Runtime::new(seed)?; + runtime.block_on(run_target::(seed, scenario, config)) + }) + .join() + .unwrap_or_else(|payload| std::panic::resume_unwind(payload)) } fn map_table_scenario(scenario: ScenarioKind) -> anyhow::Result { diff --git a/crates/dst/src/targets/buggified_repo.rs b/crates/dst/src/sim/commitlog.rs similarity index 86% rename from crates/dst/src/targets/buggified_repo.rs rename to crates/dst/src/sim/commitlog.rs index b34e5027eb0..07d8b245d39 100644 --- a/crates/dst/src/targets/buggified_repo.rs +++ b/crates/dst/src/sim/commitlog.rs @@ -1,3 +1,5 @@ +//! Commitlog storage fault-injection support for DST targets. + use std::{ fmt, io::{self, BufRead, Read, Seek, Write}, @@ -13,7 +15,7 @@ use spacetimedb_commitlog::{ segment::FileLike, }; -use crate::{config::CommitlogFaultProfile, seed::DstSeed, sim, workload::commitlog_ops::DiskFaultSummary}; +use crate::{config::CommitlogFaultProfile, seed::DstSeed, sim}; const INJECTED_DISK_ERROR_PREFIX: &str = "dst injected disk "; @@ -110,6 +112,20 @@ impl CommitlogFaultConfig { } } +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub(crate) struct CommitlogFaultSummary { + pub(crate) profile: CommitlogFaultProfile, + pub(crate) latency: usize, + pub(crate) short_read: usize, + pub(crate) short_write: usize, + pub(crate) read_error: usize, + pub(crate) write_error: usize, + pub(crate) flush_error: usize, + pub(crate) fsync_error: usize, + pub(crate) open_error: usize, + pub(crate) metadata_error: usize, +} + /// DST-only repo wrapper that makes the in-memory commitlog backend behave less like RAM. /// /// Faults stay within normal file API semantics: calls may take deterministic simulated time, @@ -117,12 +133,12 @@ impl CommitlogFaultConfig { /// The wrapper deliberately avoids corruption or crash-style partial persistence; those need a /// stronger durability model before we enable them. #[derive(Clone, Debug)] -pub(crate) struct BuggifiedRepo { +pub(crate) struct FaultableRepo { inner: R, faults: FaultController, } -impl BuggifiedRepo { +impl FaultableRepo { pub(crate) fn new(inner: R, config: CommitlogFaultConfig, seed: DstSeed) -> Self { Self { inner, @@ -134,7 +150,7 @@ impl BuggifiedRepo { self.faults.enable(); } - pub(crate) fn fault_summary(&self) -> DiskFaultSummary { + pub(crate) fn fault_summary(&self) -> CommitlogFaultSummary { self.faults.summary() } @@ -143,22 +159,22 @@ impl BuggifiedRepo { } } -impl fmt::Display for BuggifiedRepo { +impl fmt::Display for FaultableRepo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}+buggified({})", self.inner, self.faults.config.profile) + write!(f, "{}+faultable({})", self.inner, self.faults.config.profile) } } -impl Repo for BuggifiedRepo { - type SegmentWriter = BuggifiedSegment; - type SegmentReader = BuggifiedReader; +impl Repo for FaultableRepo { + type SegmentWriter = FaultableSegment; + type SegmentReader = FaultableReader; fn create_segment(&self, offset: u64) -> io::Result { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Open)?; self.inner .create_segment(offset) - .map(|inner| BuggifiedSegment::new(inner, self.faults.clone())) + .map(|inner| FaultableSegment::new(inner, self.faults.clone())) } fn open_segment_reader(&self, offset: u64) -> io::Result { @@ -166,7 +182,7 @@ impl Repo for BuggifiedRepo { self.faults.maybe_error(FaultKind::Open)?; self.inner .open_segment_reader(offset) - .map(|inner| BuggifiedReader::new(inner, self.faults.clone())) + .map(|inner| FaultableReader::new(inner, self.faults.clone())) } fn open_segment_writer(&self, offset: u64) -> io::Result { @@ -174,7 +190,7 @@ impl Repo for BuggifiedRepo { self.faults.maybe_error(FaultKind::Open)?; self.inner .open_segment_writer(offset) - .map(|inner| BuggifiedSegment::new(inner, self.faults.clone())) + .map(|inner| FaultableSegment::new(inner, self.faults.clone())) } fn segment_file_path(&self, offset: u64) -> Option { @@ -218,20 +234,20 @@ impl Repo for BuggifiedRepo { } } -impl RepoWithoutLockFile for BuggifiedRepo {} +impl RepoWithoutLockFile for FaultableRepo {} -pub(crate) struct BuggifiedSegment { +pub(crate) struct FaultableSegment { inner: S, faults: FaultController, } -impl BuggifiedSegment { +impl FaultableSegment { fn new(inner: S, faults: FaultController) -> Self { Self { inner, faults } } } -impl Read for BuggifiedSegment { +impl Read for FaultableSegment { fn read(&mut self, buf: &mut [u8]) -> io::Result { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Read)?; @@ -240,7 +256,7 @@ impl Read for BuggifiedSegment { } } -impl Write for BuggifiedSegment { +impl Write for FaultableSegment { fn write(&mut self, buf: &[u8]) -> io::Result { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Write)?; @@ -255,14 +271,14 @@ impl Write for BuggifiedSegment { } } -impl Seek for BuggifiedSegment { +impl Seek for FaultableSegment { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { self.faults.maybe_disk_latency(); self.inner.seek(pos) } } -impl SegmentLen for BuggifiedSegment { +impl SegmentLen for FaultableSegment { fn segment_len(&mut self) -> io::Result { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Metadata)?; @@ -270,7 +286,7 @@ impl SegmentLen for BuggifiedSegment { } } -impl FileLike for BuggifiedSegment { +impl FileLike for FaultableSegment { fn fsync(&mut self) -> io::Result<()> { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Fsync)?; @@ -284,18 +300,18 @@ impl FileLike for BuggifiedSegment { } } -pub(crate) struct BuggifiedReader { +pub(crate) struct FaultableReader { inner: S, faults: FaultController, } -impl BuggifiedReader { +impl FaultableReader { fn new(inner: S, faults: FaultController) -> Self { Self { inner, faults } } } -impl Read for BuggifiedReader { +impl Read for FaultableReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Read)?; @@ -304,7 +320,7 @@ impl Read for BuggifiedReader { } } -impl BufRead for BuggifiedReader { +impl BufRead for FaultableReader { fn fill_buf(&mut self) -> io::Result<&[u8]> { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Read)?; @@ -316,14 +332,14 @@ impl BufRead for BuggifiedReader { } } -impl Seek for BuggifiedReader { +impl Seek for FaultableReader { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { self.faults.maybe_disk_latency(); self.inner.seek(pos) } } -impl SegmentLen for BuggifiedReader { +impl SegmentLen for FaultableReader { fn segment_len(&mut self) -> io::Result { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Metadata)?; @@ -331,7 +347,7 @@ impl SegmentLen for BuggifiedReader { } } -impl SegmentReader for BuggifiedReader { +impl SegmentReader for FaultableReader { fn sealed(&self) -> bool { self.inner.sealed() } @@ -342,6 +358,7 @@ struct FaultController { config: CommitlogFaultConfig, counters: Arc, decisions: Arc, + time: Option, armed: Arc, suspended: Arc, } @@ -352,6 +369,7 @@ impl FaultController { config, counters: Arc::default(), decisions: Arc::new(sim::decision_source(seed)), + time: sim::time::try_current_handle(), armed: Arc::new(AtomicBool::new(false)), suspended: Arc::default(), } @@ -381,7 +399,11 @@ impl FaultController { } else { Duration::from_millis(1) }; - sim::advance_time(latency); + if let Some(time) = &self.time { + time.advance(latency); + } else { + sim::advance_time(latency); + } } } @@ -414,8 +436,8 @@ impl FaultController { self.decisions.sample_probability(probability) } - fn summary(&self) -> DiskFaultSummary { - DiskFaultSummary { + fn summary(&self) -> CommitlogFaultSummary { + CommitlogFaultSummary { profile: self.config.profile, latency: self.counters.latency.load(Ordering::Relaxed) as usize, short_read: self.counters.short_read.load(Ordering::Relaxed) as usize, diff --git a/crates/dst/src/sim/executor.rs b/crates/dst/src/sim/executor.rs index 8587b9aaa65..394a56829fe 100644 --- a/crates/dst/src/sim/executor.rs +++ b/crates/dst/src/sim/executor.rs @@ -17,7 +17,13 @@ use std::{ use futures_util::FutureExt; -use crate::{seed::DstSeed, sim::Rng}; +use crate::{ + seed::DstSeed, + sim::rng::{enter_rng_context, DeterminismLog}, + sim::system_thread::enter_simulation_thread, + sim::time::{enter_time_context, TimeHandle}, + sim::Rng, +}; type Runnable = async_task::Runnable; @@ -39,8 +45,7 @@ impl fmt::Display for NodeId { /// /// futures are scheduled as runnables, the ready queue /// is sampled by deterministic RNG, and pending execution without future events -/// is considered a test hang unless external system threads are explicitly -/// allowed for the current target. +/// is considered a test hang. pub struct Runtime { executor: Arc, } @@ -56,12 +61,8 @@ impl Runtime { self.executor.block_on(future) } - /// Allow parking briefly for non-DST runtime threads to wake the root task. - /// - /// This is currently needed by the relational target while durability still - /// uses core's production runtime boundary. - pub fn set_allow_system_thread(&mut self, allowed: bool) { - self.executor.set_allow_system_thread(allowed); + pub fn elapsed(&self) -> Duration { + self.executor.elapsed() } pub fn handle(&self) -> Handle { @@ -89,6 +90,51 @@ impl Runtime { { self.handle().spawn_on(node, future) } + + /// Run a future twice with the same seed and fail if simulator choices diverge. + pub fn check_determinism(seed: DstSeed, make_future: fn() -> F) -> F::Output + where + F: Future + 'static, + F::Output: Send + 'static, + { + Self::check_determinism_with(seed, make_future) + } + + /// Run a future twice with the same seed and fail if simulator choices diverge. + pub fn check_determinism_with(seed: DstSeed, make_future: M) -> F::Output + where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, + { + let first = make_future.clone(); + let log = thread::spawn(move || { + let mut runtime = Runtime::new(seed).expect("failed to create DST runtime"); + runtime.executor.enable_determinism_log(); + runtime.block_on(first()); + runtime + .executor + .take_determinism_log() + .expect("determinism log should be enabled") + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap(); + + thread::spawn(move || { + let mut runtime = Runtime::new(seed).expect("failed to create DST runtime"); + runtime.executor.enable_determinism_check(log); + let output = runtime.block_on(make_future()); + runtime + .executor + .finish_determinism_check() + .unwrap_or_else(|err| panic!("{err}")); + output + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap() + } } /// Cloneable access to the simulation executor. @@ -138,13 +184,18 @@ impl Future for JoinHandle { } } +fn panic_with_seed(seed: DstSeed, payload: Box) -> ! { + eprintln!("note: run with --seed {} to reproduce this error", seed.0); + std::panic::resume_unwind(payload); +} + struct Executor { queue: Receiver, sender: Sender, nodes: Mutex>>, next_node: std::sync::atomic::AtomicU64, - rng: Mutex, - allow_system_thread: AtomicBool, + rng: Arc>, + time: TimeHandle, } impl Executor { @@ -157,13 +208,29 @@ impl Executor { sender: queue.sender(), nodes: Mutex::new(nodes), next_node: std::sync::atomic::AtomicU64::new(1), - rng: Mutex::new(Rng::new(seed)), - allow_system_thread: AtomicBool::new(false), + rng: Arc::new(Mutex::new(Rng::new(seed))), + time: TimeHandle::new(), } } - fn set_allow_system_thread(&self, allowed: bool) { - self.allow_system_thread.store(allowed, Ordering::Relaxed); + fn elapsed(&self) -> Duration { + self.time.now() + } + + fn enable_determinism_log(&self) { + self.rng.lock().expect("sim rng poisoned").enable_determinism_log(); + } + + fn enable_determinism_check(&self, log: DeterminismLog) { + self.rng.lock().expect("sim rng poisoned").enable_determinism_check(log); + } + + fn take_determinism_log(&self) -> Option { + self.rng.lock().expect("sim rng poisoned").take_determinism_log() + } + + fn finish_determinism_check(&self) -> Result<(), String> { + self.rng.lock().expect("sim rng poisoned").finish_determinism_check() } fn create_node(&self) -> NodeId { @@ -207,6 +274,9 @@ impl Executor { #[track_caller] fn block_on(&self, future: F) -> F::Output { + let _system_thread_context = enter_simulation_thread(); + let _rng_context = enter_rng_context(Arc::clone(&self.rng)); + let _time_context = enter_time_context(self.time.clone()); let _waiter = WaiterGuard::new(&self.queue, thread::current()); let sender = self.sender.clone(); @@ -223,11 +293,11 @@ impl Executor { return task.now_or_never().expect("finished task should resolve"); } - if self.allow_system_thread.load(Ordering::Relaxed) { - thread::park_timeout(Duration::from_millis(1)); - } else { - panic!("no runnable tasks; all simulated tasks are blocked"); + if self.time.wake_next_timer() { + continue; } + + panic!("no runnable tasks; all simulated tasks are blocked"); } } @@ -370,7 +440,7 @@ impl Receiver { #[cfg(test)] mod tests { use std::sync::{ - atomic::{AtomicUsize, Ordering}, + atomic::{AtomicBool, AtomicUsize, Ordering}, Arc, }; @@ -411,4 +481,32 @@ mod tests { assert_eq!(value, 11); } + + #[test] + fn check_determinism_runs_future_twice() { + static CALLS: AtomicUsize = AtomicUsize::new(0); + CALLS.store(0, Ordering::SeqCst); + + let value = Runtime::check_determinism(DstSeed(3), || async { + CALLS.fetch_add(1, Ordering::SeqCst); + yield_now().await; + 13 + }); + + assert_eq!(value, 13); + assert_eq!(CALLS.load(Ordering::SeqCst), 2); + } + + #[test] + #[should_panic(expected = "non-determinism detected")] + fn check_determinism_rejects_different_scheduler_sequence() { + static FIRST_RUN: AtomicBool = AtomicBool::new(true); + FIRST_RUN.store(true, Ordering::SeqCst); + + Runtime::check_determinism(DstSeed(4), || async { + if FIRST_RUN.swap(false, Ordering::SeqCst) { + yield_now().await; + } + }); + } } diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs index b9d2b682cb9..c64232f8387 100644 --- a/crates/dst/src/sim/mod.rs +++ b/crates/dst/src/sim/mod.rs @@ -4,8 +4,11 @@ //! futures are scheduled as runnable tasks and the ready queue is sampled by a //! deterministic RNG instead of being driven by a package-level async runtime. +pub(crate) mod commitlog; mod executor; mod rng; +mod system_thread; +pub mod time; use std::time::Duration; @@ -23,9 +26,8 @@ pub(crate) fn current_handle_or_new_runtime() -> anyhow::Result<(RuntimeHandle, spacetimedb_core::runtime::current_handle_or_new_runtime() } -pub(crate) fn advance_time(_duration: Duration) { - // This is a hook, not wall-clock sleep. A future simulator layer can advance - // virtual time here while keeping targets on the same API. +pub(crate) fn advance_time(duration: Duration) { + time::advance(duration); } pub(crate) fn decision_source(seed: DstSeed) -> DecisionSource { diff --git a/crates/dst/src/sim/rng.rs b/crates/dst/src/sim/rng.rs index 9b19e527002..1b59d0cffe9 100644 --- a/crates/dst/src/sim/rng.rs +++ b/crates/dst/src/sim/rng.rs @@ -1,4 +1,11 @@ -use std::sync::atomic::{AtomicU64, Ordering}; +use std::{ + cell::{Cell, RefCell}, + ptr, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, OnceLock, + }, +}; use crate::seed::DstSeed; @@ -6,19 +13,31 @@ const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; #[derive(Clone, Debug)] pub struct Rng { + seed: u64, state: u64, + log: Option>, + check: Option<(Vec, usize)>, } impl Rng { pub fn new(seed: DstSeed) -> Self { + unsafe { getentropy(ptr::null_mut(), 0) }; + if !init_std_random_state(seed.0) { + tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); + } Self { + seed: seed.0, state: splitmix64(seed.0), + log: None, + check: None, } } pub fn next_u64(&mut self) -> u64 { self.state = self.state.wrapping_add(GAMMA); - splitmix64(self.state) + let value = splitmix64(self.state); + self.record_checkpoint(value); + value } pub fn index(&mut self, len: usize) -> usize { @@ -29,8 +48,68 @@ impl Rng { pub fn sample_probability(&mut self, probability: f64) -> bool { probability_sample(self.next_u64(), probability) } + + pub(crate) fn fill_bytes(&mut self, dest: &mut [u8]) { + for chunk in dest.chunks_mut(std::mem::size_of::()) { + let bytes = self.next_u64().to_ne_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); + } + } + + pub(crate) fn enable_determinism_log(&mut self) { + self.log = Some(Vec::new()); + self.check = None; + } + + pub(crate) fn enable_determinism_check(&mut self, log: DeterminismLog) { + self.check = Some((log.0, 0)); + self.log = None; + } + + pub(crate) fn take_determinism_log(&mut self) -> Option { + self.log + .take() + .or_else(|| self.check.take().map(|(log, _)| log)) + .map(DeterminismLog) + } + + pub(crate) fn finish_determinism_check(&self) -> Result<(), String> { + if let Some((log, consumed)) = &self.check + && *consumed != log.len() + { + return Err(format!( + "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", + self.seed, + log.len() + )); + } + Ok(()) + } + + fn record_checkpoint(&mut self, value: u64) { + if self.log.is_none() && self.check.is_none() { + return; + } + + let checkpoint = checksum(value); + if let Some(log) = &mut self.log { + log.push(checkpoint); + } + if let Some((expected, consumed)) = &mut self.check { + if expected.get(*consumed) != Some(&checkpoint) { + panic!( + "non-determinism detected for seed {} at checkpoint {consumed}", + self.seed + ); + } + *consumed += 1; + } + } } +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) struct DeterminismLog(Vec); + #[derive(Debug)] pub(crate) struct DecisionSource { state: AtomicU64, @@ -72,3 +151,209 @@ fn splitmix64(mut x: u64) -> u64 { x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); x ^ (x >> 31) } + +fn checksum(value: u64) -> u8 { + value.to_ne_bytes().into_iter().fold(0, |acc, byte| acc ^ byte) +} + +thread_local! { + static CURRENT_RNG: RefCell>>> = const { RefCell::new(None) }; + static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; +} + +pub(crate) struct RngContextGuard { + previous: Option>>, +} + +pub(crate) fn enter_rng_context(rng: Arc>) -> RngContextGuard { + let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); + RngContextGuard { previous } +} + +impl Drop for RngContextGuard { + fn drop(&mut self) { + CURRENT_RNG.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +fn init_std_random_state(seed: u64) -> bool { + STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); + let _ = std::collections::hash_map::RandomState::new(); + STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() +} + +fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { + if buflen == 0 { + return; + } + let mut state = splitmix64(seed); + let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; + for chunk in buf.chunks_mut(std::mem::size_of::()) { + state = state.wrapping_add(GAMMA); + let bytes = splitmix64(state).to_ne_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); + } +} + +fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { + CURRENT_RNG.with(|current| { + let Some(rng) = current.borrow().clone() else { + return false; + }; + if buflen == 0 { + return true; + } + let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; + rng.lock().expect("sim rng poisoned").fill_bytes(buf); + true + }) +} + +/// Obtain random bytes through the simulation RNG when running inside the DST executor. +/// +/// This mirrors madsim's libc-level hook. It covers libc users and macOS +/// `CCRandomGenerateBytes`; crates that issue raw kernel syscalls can still +/// bypass it. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { + #[cfg(target_os = "macos")] + let _ = flags; + + if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { + fill_from_seed(buf, buflen, seed); + return buflen as isize; + } + if fill_from_current_rng(buf, buflen) { + return buflen as isize; + } + + #[cfg(target_os = "linux")] + { + type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; + static GETRANDOM: OnceLock = OnceLock::new(); + let original = GETRANDOM.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getrandom"); + std::mem::transmute(ptr) + }); + unsafe { original(buf, buflen, flags) } + } + + #[cfg(target_os = "macos")] + { + type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; + static GETENTROPY: OnceLock = OnceLock::new(); + let original = GETENTROPY.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getentropy"); + std::mem::transmute(ptr) + }); + match unsafe { original(buf, buflen) } { + -1 => -1, + 0 => buflen as isize, + _ => unreachable!("unexpected getentropy return value"), + } + } + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + let _ = (buf, buflen, flags); + compile_error!("unsupported OS for DST getrandom override"); + } +} + +/// Fill a buffer with random bytes through the same hook used by libc. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { + if buflen > 256 { + return -1; + } + match unsafe { getrandom(buf, buflen, 0) } { + -1 => -1, + _ => 0, + } +} + +/// macOS uses CommonCrypto for process randomness in newer Rust toolchains. +#[cfg(target_os = "macos")] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { + match unsafe { getrandom(bytes, count, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(test)] +mod tests { + use std::{collections::HashMap, sync::Arc}; + + use super::*; + + #[test] + fn rng_log_check_accepts_same_sequence() { + let mut first = Rng::new(DstSeed(10)); + first.enable_determinism_log(); + let first_values = (0..8).map(|_| first.next_u64()).collect::>(); + let log = first.take_determinism_log().unwrap(); + + let mut second = Rng::new(DstSeed(10)); + second.enable_determinism_check(log); + let second_values = (0..8).map(|_| second.next_u64()).collect::>(); + second.finish_determinism_check().unwrap(); + + assert_eq!(first_values, second_values); + } + + #[test] + #[should_panic(expected = "non-determinism detected")] + fn rng_log_check_rejects_different_sequence() { + let mut first = Rng::new(DstSeed(10)); + first.enable_determinism_log(); + first.next_u64(); + let log = first.take_determinism_log().unwrap(); + + let mut second = Rng::new(DstSeed(11)); + second.enable_determinism_check(log); + second.next_u64(); + } + + #[test] + fn getentropy_uses_current_sim_rng() { + let rng = Arc::new(Mutex::new(Rng::new(DstSeed(20)))); + let _guard = enter_rng_context(Arc::clone(&rng)); + + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + + let mut expected_rng = Rng::new(DstSeed(20)); + let mut expected = [0u8; 24]; + expected_rng.fill_bytes(&mut expected); + assert_eq!(actual, expected); + } + + #[test] + fn std_hashmap_order_is_seeded_for_runtime_thread() { + fn order_for(seed: DstSeed) -> Vec<(u64, u64)> { + std::thread::spawn(move || { + let _rng = Rng::new(seed); + (0..12) + .map(|idx| (idx, idx)) + .collect::>() + .into_iter() + .collect() + }) + .join() + .unwrap() + } + + assert_eq!(order_for(DstSeed(30)), order_for(DstSeed(30))); + } +} diff --git a/crates/dst/src/sim/system_thread.rs b/crates/dst/src/sim/system_thread.rs new file mode 100644 index 00000000000..9bb3e612d7b --- /dev/null +++ b/crates/dst/src/sim/system_thread.rs @@ -0,0 +1,64 @@ +//! Guard against creating OS threads from inside the simulator. + +use std::{cell::Cell, sync::OnceLock}; + +thread_local! { + static IN_SIMULATION: Cell = const { Cell::new(false) }; +} + +pub(crate) struct SimulationThreadGuard { + previous: bool, +} + +pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { + let previous = IN_SIMULATION.with(|state| state.replace(true)); + SimulationThreadGuard { previous } +} + +impl Drop for SimulationThreadGuard { + fn drop(&mut self) { + IN_SIMULATION.with(|state| { + state.set(self.previous); + }); + } +} + +fn in_simulation() -> bool { + IN_SIMULATION.with(Cell::get) +} + +/// Forbid creating system threads in simulation. +#[cfg(unix)] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { + if in_simulation() { + eprintln!("attempt to spawn a system thread in simulation."); + eprintln!("note: use simulator tasks instead."); + return -1; + } + + type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; + static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); + let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); + std::mem::transmute(ptr) + }); + unsafe { original(attr) } +} + +#[cfg(test)] +mod tests { + use crate::{seed::DstSeed, sim}; + + #[test] + #[cfg(unix)] + fn runtime_forbids_system_thread_spawn() { + let mut runtime = sim::Runtime::new(DstSeed(200)).unwrap(); + runtime.block_on(async { + let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); + assert!(result.is_err()); + }); + } +} diff --git a/crates/dst/src/sim/time.rs b/crates/dst/src/sim/time.rs new file mode 100644 index 00000000000..6a00836637b --- /dev/null +++ b/crates/dst/src/sim/time.rs @@ -0,0 +1,272 @@ +//! Virtual time for the local DST simulator. + +use std::{ + cell::RefCell, + collections::BTreeMap, + future::Future, + pin::Pin, + sync::{Arc, Mutex}, + task::{Context, Poll, Waker}, + time::Duration, +}; + +#[derive(Clone, Debug)] +pub struct TimeHandle { + inner: Arc>, +} + +impl TimeHandle { + pub(crate) fn new() -> Self { + Self { + inner: Arc::new(Mutex::new(TimeState::default())), + } + } + + pub fn now(&self) -> Duration { + self.inner.lock().expect("sim time poisoned").now + } + + pub fn advance(&self, duration: Duration) { + if duration.is_zero() { + return; + } + + let wakers = { + let mut state = self.inner.lock().expect("sim time poisoned"); + state.now = state.now.saturating_add(duration); + state.take_due_wakers() + }; + wake_all(wakers); + } + + pub(crate) fn wake_next_timer(&self) -> bool { + let wakers = { + let mut state = self.inner.lock().expect("sim time poisoned"); + let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { + return false; + }; + if next_deadline > state.now { + state.now = next_deadline; + } + state.take_due_wakers() + }; + let woke = !wakers.is_empty(); + wake_all(wakers); + woke + } + + fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { + let mut state = self.inner.lock().expect("sim time poisoned"); + state.timers.insert( + id, + TimerEntry { + deadline, + waker: waker.clone(), + }, + ); + } + + fn cancel_timer(&self, id: TimerId) { + self.inner.lock().expect("sim time poisoned").timers.remove(&id); + } + + fn next_timer_id(&self) -> TimerId { + let mut state = self.inner.lock().expect("sim time poisoned"); + let id = TimerId(state.next_timer_id); + state.next_timer_id = state.next_timer_id.saturating_add(1); + id + } +} + +#[derive(Debug, Default)] +struct TimeState { + now: Duration, + next_timer_id: u64, + timers: BTreeMap, +} + +impl TimeState { + fn take_due_wakers(&mut self) -> Vec { + let due = self + .timers + .iter() + .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) + .collect::>(); + due.into_iter() + .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) + .collect() + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct TimerId(u64); + +#[derive(Debug)] +struct TimerEntry { + deadline: Duration, + waker: Waker, +} + +thread_local! { + static CURRENT_TIME: RefCell> = const { RefCell::new(None) }; +} + +pub(crate) struct TimeContextGuard { + previous: Option, +} + +pub(crate) fn enter_time_context(handle: TimeHandle) -> TimeContextGuard { + let previous = CURRENT_TIME.with(|current| current.replace(Some(handle))); + TimeContextGuard { previous } +} + +pub(crate) fn try_current_handle() -> Option { + CURRENT_TIME.with(|current| current.borrow().clone()) +} + +pub fn now() -> Duration { + try_current_handle().map(|handle| handle.now()).unwrap_or_default() +} + +pub fn advance(duration: Duration) { + if let Some(handle) = try_current_handle() { + handle.advance(duration); + } +} + +pub fn sleep(duration: Duration) -> Sleep { + Sleep { + duration, + state: SleepState::Unregistered, + } +} + +impl Drop for TimeContextGuard { + fn drop(&mut self) { + CURRENT_TIME.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +pub struct Sleep { + duration: Duration, + state: SleepState, +} + +enum SleepState { + Unregistered, + Registered { + handle: TimeHandle, + id: TimerId, + deadline: Duration, + }, + Done, +} + +impl Future for Sleep { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if matches!(self.state, SleepState::Done) { + return Poll::Ready(()); + } + + if matches!(self.state, SleepState::Unregistered) { + let handle = try_current_handle().expect("sim::time::sleep polled outside sim runtime"); + let deadline = handle.now().saturating_add(self.duration); + let id = handle.next_timer_id(); + self.state = SleepState::Registered { handle, id, deadline }; + } + + let SleepState::Registered { handle, id, deadline } = &self.state else { + unreachable!("sleep state should be registered or done"); + }; + + if handle.now() >= *deadline { + let handle = handle.clone(); + let id = *id; + handle.cancel_timer(id); + self.state = SleepState::Done; + Poll::Ready(()) + } else { + handle.register_timer(*id, *deadline, cx.waker()); + Poll::Pending + } + } +} + +impl Drop for Sleep { + fn drop(&mut self) { + if let SleepState::Registered { handle, id, .. } = &self.state { + handle.cancel_timer(*id); + } + } +} + +fn wake_all(wakers: Vec) { + for waker in wakers { + waker.wake(); + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::{Arc, Mutex}, + time::Duration, + }; + + use crate::{seed::DstSeed, sim}; + + #[test] + fn sleep_fast_forwards_virtual_time() { + let mut runtime = sim::Runtime::new(DstSeed(101)).unwrap(); + + runtime.block_on(async { + assert_eq!(super::now(), Duration::ZERO); + super::sleep(Duration::from_millis(5)).await; + assert_eq!(super::now(), Duration::from_millis(5)); + }); + } + + #[test] + fn shorter_timer_wakes_first() { + let mut runtime = sim::Runtime::new(DstSeed(102)).unwrap(); + let handle = runtime.handle(); + let order = Arc::new(Mutex::new(Vec::new())); + + runtime.block_on({ + let order = Arc::clone(&order); + async move { + let slow_order = Arc::clone(&order); + let slow = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(10)).await; + slow_order.lock().expect("order poisoned").push(10); + }); + + let fast_order = Arc::clone(&order); + let fast = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(3)).await; + fast_order.lock().expect("order poisoned").push(3); + }); + + fast.await; + slow.await; + } + }); + + assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); + assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + } + + #[test] + fn explicit_advance_moves_virtual_time() { + let mut runtime = sim::Runtime::new(DstSeed(103)).unwrap(); + + runtime.block_on(async { + super::advance(Duration::from_millis(7)); + assert_eq!(super::now(), Duration::from_millis(7)); + }); + } +} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index 52a941ec8f1..51a483d73a2 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -1,5 +1,4 @@ //! Concrete simulation targets. -pub(crate) mod buggified_repo; pub mod descriptor; pub mod relational_db_commitlog; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 1bf0e4d2fb4..020f0b02732 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -37,10 +37,12 @@ use crate::{ }, schema::{SchemaPlan, SimRow}, seed::DstSeed, - sim, - targets::buggified_repo::{is_injected_disk_error_text, BuggifiedRepo, CommitlogFaultConfig}, + sim::{ + self, + commitlog::{is_injected_disk_error_text, CommitlogFaultConfig, CommitlogFaultSummary, FaultableRepo}, + }, workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary}, commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, table_ops::{ ConnectionWriteState, TableErrorKind, TableInteractionCase, TableOperation, TableScenario, TableScenarioId, @@ -1264,7 +1266,7 @@ impl RelationalDbEngine { table_ops: self.stats.table_ops.clone(), transactions: self.stats.transaction_summary(durable_commit_count), runtime: self.stats.runtime_summary(), - disk_faults: self.commitlog_repo.fault_summary(), + disk_faults: disk_fault_summary(self.commitlog_repo.fault_summary()), replay, table, }) @@ -1382,7 +1384,7 @@ impl TargetEngine for RelationalDbEngine { } } -type StressCommitlogRepo = BuggifiedRepo; +type StressCommitlogRepo = FaultableRepo; type InMemoryCommitlogDurability = Local; struct RelationalDbBootstrap { @@ -1401,7 +1403,7 @@ fn bootstrap_relational_db( let (runtime_handle, runtime_guard) = sim::current_handle_or_new_runtime()?; let fault_config = CommitlogFaultConfig::for_profile(fault_profile); - let commitlog_repo = BuggifiedRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024), fault_config, seed.fork(702)); + let commitlog_repo = FaultableRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024), fault_config, seed.fork(702)); let durability_opts = commitlog_stress_options(seed.fork(701)); let durability = Arc::new( InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime_handle.clone(), durability_opts) @@ -1473,6 +1475,21 @@ fn schema_summary(schema: &SchemaPlan) -> SchemaSummary { } } +fn disk_fault_summary(summary: CommitlogFaultSummary) -> DiskFaultSummary { + DiskFaultSummary { + profile: summary.profile, + latency: summary.latency, + short_read: summary.short_read, + short_write: summary.short_write, + read_error: summary.read_error, + write_error: summary.write_error, + flush_error: summary.flush_error, + fsync_error: summary.fsync_error, + open_error: summary.open_error, + metadata_error: summary.metadata_error, + } +} + fn in_memory_size_on_disk() -> io::Result { Ok(SizeOnDisk::default()) } From 051f8ac8d2b58e179d7ff3de425a0d3637066e96 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 5 May 2026 22:12:24 +0530 Subject: [PATCH 29/74] runtime crate --- Cargo.lock | 25 +- Cargo.toml | 4 +- crates/core/Cargo.toml | 2 +- crates/core/src/auth/token_validation.rs | 2 +- crates/core/src/database_logger.rs | 11 +- crates/core/src/db/durability.rs | 14 +- crates/core/src/db/persistence.rs | 19 +- crates/core/src/db/relational_db.rs | 7 +- crates/core/src/host/disk_storage.rs | 4 +- crates/core/src/host/instance_env.rs | 2 +- crates/core/src/runtime.rs | 14 +- .../subscription/module_subscription_actor.rs | 2 +- crates/dst/Cargo.toml | 5 +- crates/dst/src/sim/commitlog.rs | 38 +- crates/dst/src/sim/mod.rs | 73 +- crates/dst/src/sim/time.rs | 242 +----- .../src/targets/relational_db_commitlog.rs | 48 +- .../src/workload/commitlog_ops/generation.rs | 86 ++- .../dst/src/workload/table_ops/generation.rs | 4 + crates/dst/src/workload/table_ops/mod.rs | 2 + crates/dst/src/workload/table_ops/model.rs | 98 ++- .../workload/table_ops/scenarios/banking.rs | 13 + crates/durability/src/imp/mod.rs | 128 ++- crates/io/Cargo.toml | 13 - crates/io/build.rs | 10 - crates/io/src/lib.rs | 73 -- crates/runtime/Cargo.toml | 24 + crates/runtime/LICENSE | 731 ++++++++++++++++++ crates/runtime/src/lib.rs | 100 +++ crates/{dst => runtime}/src/sim/executor.rs | 113 ++- crates/runtime/src/sim/mod.rs | 23 + crates/{dst => runtime}/src/sim/rng.rs | 46 +- .../{dst => runtime}/src/sim/system_thread.rs | 4 +- crates/runtime/src/sim/time.rs | 343 ++++++++ tools/ci/src/main.rs | 59 +- 35 files changed, 1918 insertions(+), 464 deletions(-) delete mode 100644 crates/io/Cargo.toml delete mode 100644 crates/io/build.rs delete mode 100644 crates/io/src/lib.rs create mode 100644 crates/runtime/Cargo.toml create mode 100644 crates/runtime/LICENSE create mode 100644 crates/runtime/src/lib.rs rename crates/{dst => runtime}/src/sim/executor.rs (80%) create mode 100644 crates/runtime/src/sim/mod.rs rename crates/{dst => runtime}/src/sim/rng.rs (91%) rename crates/{dst => runtime}/src/sim/system_thread.rs (94%) create mode 100644 crates/runtime/src/sim/time.rs diff --git a/Cargo.lock b/Cargo.lock index c973141026c..cf40c3e9845 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8211,7 +8211,6 @@ dependencies = [ "spacetimedb-execution", "spacetimedb-expr", "spacetimedb-fs-utils", - "spacetimedb-io", "spacetimedb-jsonwebtoken", "spacetimedb-jwks", "spacetimedb-lib 2.2.0", @@ -8221,6 +8220,7 @@ dependencies = [ "spacetimedb-physical-plan", "spacetimedb-primitives 2.2.0", "spacetimedb-query", + "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "spacetimedb-schema", "spacetimedb-snapshot", @@ -8310,16 +8310,15 @@ name = "spacetimedb-dst" version = "2.2.0" dependencies = [ "anyhow", - "async-task", "clap 4.5.50", "futures-util", - "libc", "spacetimedb-commitlog", "spacetimedb-core", "spacetimedb-datastore", "spacetimedb-durability", "spacetimedb-lib 2.2.0", "spacetimedb-primitives 2.2.0", + "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "spacetimedb-schema", "spacetimedb-table", @@ -8407,13 +8406,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "spacetimedb-io" -version = "2.2.0" -dependencies = [ - "tokio", -] - [[package]] name = "spacetimedb-jsonwebtoken" version = "9.3.0" @@ -8615,6 +8607,19 @@ dependencies = [ "spacetimedb-lib 2.2.0", ] +[[package]] +name = "spacetimedb-runtime" +version = "2.2.0" +dependencies = [ + "anyhow", + "async-task", + "futures", + "futures-util", + "libc", + "tokio", + "tracing", +] + [[package]] name = "spacetimedb-sats" version = "1.9.0" diff --git a/Cargo.toml b/Cargo.toml index baa1e9f3b8b..d1b1efdaa4e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ members = [ "crates/physical-plan", "crates/primitives", "crates/query", - "crates/io", + "crates/runtime", "crates/sats", "crates/schema", "crates/smoketests", @@ -132,7 +132,6 @@ spacetimedb-durability = { path = "crates/durability", version = "=2.2.0" } spacetimedb-execution = { path = "crates/execution", version = "=2.2.0" } spacetimedb-expr = { path = "crates/expr", version = "=2.2.0" } spacetimedb-guard = { path = "crates/guard", version = "=2.2.0" } -spacetimedb-io = { path = "crates/io", version = "=2.2.0" } spacetimedb-lib = { path = "crates/lib", default-features = false, version = "=2.2.0" } spacetimedb-memory-usage = { path = "crates/memory-usage", version = "=2.2.0", default-features = false } spacetimedb-metrics = { path = "crates/metrics", version = "=2.2.0" } @@ -141,6 +140,7 @@ spacetimedb-pg = { path = "crates/pg", version = "=2.2.0" } spacetimedb-physical-plan = { path = "crates/physical-plan", version = "=2.2.0" } spacetimedb-primitives = { path = "crates/primitives", version = "=2.2.0" } spacetimedb-query = { path = "crates/query", version = "=2.2.0" } +spacetimedb-runtime = { path = "crates/runtime", version = "=2.2.0", default-features = false } spacetimedb-sats = { path = "crates/sats", version = "=2.2.0" } spacetimedb-schema = { path = "crates/schema", version = "=2.2.0" } spacetimedb-standalone = { path = "crates/standalone", version = "=2.2.0" } diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index ed8c2ac9e9d..fee3cfd68b9 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -28,7 +28,7 @@ spacetimedb-primitives.workspace = true spacetimedb-paths.workspace = true spacetimedb-physical-plan.workspace = true spacetimedb-query.workspace = true -spacetimedb-io.workspace = true +spacetimedb-runtime = { workspace = true, features = ["tokio"] } spacetimedb-sats = { workspace = true, features = ["serde"] } spacetimedb-schema.workspace = true spacetimedb-table.workspace = true diff --git a/crates/core/src/auth/token_validation.rs b/crates/core/src/auth/token_validation.rs index c644de5af61..c38d732882d 100644 --- a/crates/core/src/auth/token_validation.rs +++ b/crates/core/src/auth/token_validation.rs @@ -481,7 +481,7 @@ mod tests { use axum::routing::get; use axum::Json; use axum::Router; - use spacetimedb_io::net::TcpListener; + use tokio::net::TcpListener; use tokio::sync::oneshot; use serde::{Deserialize, Serialize}; diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index 4804cea8093..f194cb60a48 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -3,8 +3,6 @@ use chrono::{NaiveDate, Utc}; use futures::stream::{self, BoxStream}; use futures::{Stream, StreamExt as _, TryStreamExt}; use pin_project_lite::pin_project; -use spacetimedb_io::fs::FileFromStd; -use spacetimedb_io::io::{AsyncRead, BufReader, ReadBuf}; use std::collections::VecDeque; use std::fs::File; use std::future; @@ -13,6 +11,7 @@ use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; +use tokio::io::{AsyncRead, BufReader, ReadBuf}; use tokio::sync::{broadcast, mpsc, oneshot}; use tokio_stream::wrappers::errors::BroadcastStreamRecvError; use tokio_stream::wrappers::BroadcastStream; @@ -108,7 +107,7 @@ impl Logger for FileLogger { seek_to(&mut file, &mut buf, n)?; } - Ok::<_, io::Error>(spacetimedb_io::fs::file_from_std(file)) + Ok::<_, io::Error>(tokio::fs::File::from_std(file)) } })) .map_ok(ReaderStream::new) @@ -593,7 +592,7 @@ fn seek_to(file: &mut File, buf: &mut [u8], num_lines: u32) -> io::Result<()> { Ok(()) } -fn read_exact_at(file: &std::fs::File, buf: &mut [u8], offset: u64) -> io::Result<()> { +fn read_exact_at(file: &File, buf: &mut [u8], offset: u64) -> io::Result<()> { #[cfg(unix)] { use std::os::unix::fs::FileExt; @@ -627,14 +626,14 @@ fn into_file_stream(file: impl Into>) -> impl Stream) -> Self { - match file.map(spacetimedb_io::fs::file_from_std) { + match file.map(tokio::fs::File::from_std) { Some(inner) => Self::File { inner }, None => Self::Empty, } diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index 857d9828d4e..3a466d53eb6 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -9,9 +9,8 @@ use spacetimedb_datastore::{execution_context::ReducerContext, traits::TxData}; use spacetimedb_durability::Transaction; use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; -use tokio::{runtime, time::timeout}; -use crate::db::persistence::Durability; +use crate::{db::persistence::Durability, runtime::RuntimeDispatch}; pub(super) fn request_durability( durability: &Durability, @@ -32,12 +31,12 @@ pub(super) fn request_durability( })); } -pub(super) fn spawn_close(durability: Arc, runtime: &runtime::Handle, database_identity: Identity) { - let rt = runtime.clone(); - rt.spawn(async move { +pub(super) fn spawn_close(durability: Arc, runtime: &RuntimeDispatch, database_identity: Identity) { + let label = format!("[{database_identity}]"); + let runtime = runtime.clone(); + runtime.clone().spawn(async move { log::info!("starting spawn close"); - let label = format!("[{database_identity}]"); - match timeout(Duration::from_secs(10), durability.close()).await { + match runtime.timeout(Duration::from_secs(10), durability.close()).await { Err(_elapsed) => { error!("{label} timeout waiting for durability shutdown"); } @@ -45,7 +44,6 @@ pub(super) fn spawn_close(durability: Arc, runtime: &runtime::Handle info!("{label} durability shut down at tx offset: {offset:?}"); } } - log::info!("closing spawn close"); }); } diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index e837506da38..7eaabe64a7b 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -6,7 +6,7 @@ use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::SnapshotRepository; -use crate::{messages::control_db::Database, util::asyncify}; +use crate::{messages::control_db::Database, runtime::RuntimeDispatch, util::asyncify}; use super::{ relational_db::{self, Txdata}, @@ -41,8 +41,8 @@ pub struct Persistence { /// persistent (as opposed to in-memory) databases. This is enforced by /// this type. pub snapshots: Option, - /// The tokio runtime onto which durability-related tasks shall be spawned. - pub runtime: tokio::runtime::Handle, + /// Runtime onto which durability-related tasks shall be spawned. + pub runtime: RuntimeDispatch, } impl Persistence { @@ -52,6 +52,15 @@ impl Persistence { disk_size: impl Fn() -> io::Result + Send + Sync + 'static, snapshots: Option, runtime: tokio::runtime::Handle, + ) -> Self { + Self::new_with_runtime(durability, disk_size, snapshots, RuntimeDispatch::tokio(runtime)) + } + + pub fn new_with_runtime( + durability: impl spacetimedb_durability::Durability + 'static, + disk_size: impl Fn() -> io::Result + Send + Sync + 'static, + snapshots: Option, + runtime: RuntimeDispatch, ) -> Self { Self { durability: Arc::new(durability), @@ -91,7 +100,7 @@ impl Persistence { Option>, Option, Option, - Option, + Option, ) { this.map( |Self { @@ -162,7 +171,7 @@ impl PersistenceProvider for LocalPersistenceProvider { durability, disk_size, snapshots: Some(snapshot_worker), - runtime: tokio::runtime::Handle::current(), + runtime: RuntimeDispatch::tokio_current(), }) } } diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 0bb1acfbce1..04e4d56cb0b 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -1,6 +1,7 @@ use crate::db::durability::{request_durability, spawn_close as spawn_durability_close}; use crate::db::MetricsRecorderQueue; use crate::error::{DBError, RestoreSnapshotError}; +use crate::runtime::RuntimeDispatch; use crate::subscription::ExecutionCounters; use crate::util::asyncify; use crate::worker_metrics::WORKER_METRICS; @@ -99,7 +100,7 @@ pub struct RelationalDB { inner: Locking, durability: Option>, - durability_runtime: Option, + durability_runtime: Option, snapshot_worker: Option, row_count_fn: RowCountFn, @@ -1939,7 +1940,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: rt, + runtime: RuntimeDispatch::tokio(rt), }; let (db, _) = RelationalDB::open( @@ -2060,7 +2061,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: rt, + runtime: RuntimeDispatch::tokio(rt), }; let db = Self::open_db(history, Some(persistence), None, 0)?; diff --git a/crates/core/src/host/disk_storage.rs b/crates/core/src/host/disk_storage.rs index 7662f50db38..3c55472aa16 100644 --- a/crates/core/src/host/disk_storage.rs +++ b/crates/core/src/host/disk_storage.rs @@ -1,9 +1,9 @@ use async_trait::async_trait; -use spacetimedb_io::fs; -use spacetimedb_io::io::AsyncWriteExt; use spacetimedb_lib::{hash_bytes, Hash}; use std::io; use std::path::PathBuf; +use tokio::fs; +use tokio::io::AsyncWriteExt; use super::ExternalStorage; diff --git a/crates/core/src/host/instance_env.rs b/crates/core/src/host/instance_env.rs index 4c84a775a5d..0d3d41632b1 100644 --- a/crates/core/src/host/instance_env.rs +++ b/crates/core/src/host/instance_env.rs @@ -1019,7 +1019,7 @@ impl reqwest::dns::Resolve for FilteredDnsResolver { fn resolve(&self, name: reqwest::dns::Name) -> reqwest::dns::Resolving { let host = name.as_str().to_owned(); Box::pin(async move { - let addrs = spacetimedb_io::net::lookup_host((host.as_str(), 0)).await?; + let addrs = tokio::net::lookup_host((host.as_str(), 0)).await?; let filtered_addrs: Vec = addrs.filter(|addr| !is_blocked_ip(addr.ip())).collect(); if filtered_addrs.is_empty() { diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs index 0d16e2d3566..53baad4d73a 100644 --- a/crates/core/src/runtime.rs +++ b/crates/core/src/runtime.rs @@ -1,13 +1,3 @@ -//! Opaque runtime boundary for crates that should not depend on Tokio directly. +//! Runtime boundary re-exported for core call sites. -pub type Handle = tokio::runtime::Handle; -pub type Runtime = tokio::runtime::Runtime; - -pub fn current_handle_or_new_runtime() -> anyhow::Result<(Handle, Option)> { - if let Ok(handle) = Handle::try_current() { - return Ok((handle, None)); - } - - let runtime = Runtime::new()?; - Ok((runtime.handle().clone(), Some(runtime))) -} +pub use spacetimedb_runtime::{current_handle_or_new_runtime, Handle, Runtime, RuntimeDispatch, RuntimeTimeout}; diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index c7cb2337bcf..14c28f32f46 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2046,7 +2046,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: rt, + runtime: crate::runtime::RuntimeDispatch::tokio(rt), }), None, 0, diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index d425f3d977f..5814aac7e2e 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -16,16 +16,15 @@ bench = false [dependencies] anyhow.workspace = true -async-task = "4.4" clap.workspace = true futures-util.workspace = true -libc = "0.2" spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0" } spacetimedb-commitlog = { workspace = true, features = ["test"] } -spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0" } +spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0", features = ["test"] } spacetimedb-lib.workspace = true spacetimedb-primitives.workspace = true +spacetimedb-runtime = { workspace = true, features = ["simulation"] } spacetimedb-sats.workspace = true spacetimedb-schema = { workspace = true, features = ["test"] } spacetimedb-table.workspace = true diff --git a/crates/dst/src/sim/commitlog.rs b/crates/dst/src/sim/commitlog.rs index 07d8b245d39..0cefde7ede9 100644 --- a/crates/dst/src/sim/commitlog.rs +++ b/crates/dst/src/sim/commitlog.rs @@ -324,7 +324,9 @@ impl BufRead for FaultableReader { fn fill_buf(&mut self) -> io::Result<&[u8]> { self.faults.maybe_disk_latency(); self.faults.maybe_error(FaultKind::Read)?; - self.inner.fill_buf() + let buf = self.inner.fill_buf()?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); + Ok(&buf[..len]) } fn consume(&mut self, amount: usize) { @@ -534,3 +536,37 @@ impl FaultKind { } } } + +#[cfg(test)] +mod tests { + use std::io::{BufRead, Cursor}; + + use super::*; + + fn always_short_read_config() -> CommitlogFaultConfig { + CommitlogFaultConfig { + profile: CommitlogFaultProfile::Default, + enabled: true, + latency_prob: 0.0, + long_latency_prob: 0.0, + short_io_prob: 1.0, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + } + } + + #[test] + fn buf_read_path_applies_short_read_faults() { + let faults = FaultController::new(always_short_read_config(), DstSeed(55)); + faults.enable(); + let mut reader = FaultableReader::new(Cursor::new(vec![1, 2, 3, 4]), faults.clone()); + + assert_eq!(reader.fill_buf().unwrap(), &[1, 2]); + assert_eq!(faults.summary().short_read, 1); + } +} diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs index c64232f8387..fce9b4bd663 100644 --- a/crates/dst/src/sim/mod.rs +++ b/crates/dst/src/sim/mod.rs @@ -5,25 +5,74 @@ //! deterministic RNG instead of being driven by a package-level async runtime. pub(crate) mod commitlog; -mod executor; -mod rng; -mod system_thread; pub mod time; -use std::time::Duration; +use std::{future::Future, time::Duration}; -pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; -pub use rng::Rng; +pub use spacetimedb_runtime::sim::{yield_now, DecisionSource, Handle, JoinHandle, NodeId, Rng}; use crate::seed::DstSeed; -pub(crate) use rng::DecisionSource; +/// DST-facing wrapper that keeps the top-level seed type local to this crate. +pub struct Runtime { + inner: spacetimedb_runtime::sim::Runtime, +} + +impl Runtime { + pub fn new(seed: DstSeed) -> anyhow::Result { + Ok(Self { + inner: spacetimedb_runtime::sim::Runtime::new(seed.0)?, + }) + } + + pub fn block_on(&mut self, future: F) -> F::Output { + self.inner.block_on(future) + } + + pub fn elapsed(&self) -> Duration { + self.inner.elapsed() + } + + pub fn handle(&self) -> Handle { + self.inner.handle() + } + + pub fn create_node(&self) -> NodeId { + self.inner.create_node() + } + + pub fn pause(&self, node: NodeId) { + self.inner.pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.inner.resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.inner.spawn_on(node, future) + } -pub(crate) type RuntimeHandle = spacetimedb_core::runtime::Handle; -pub(crate) type RuntimeGuard = spacetimedb_core::runtime::Runtime; + pub fn check_determinism(seed: DstSeed, make_future: fn() -> F) -> F::Output + where + F: Future + 'static, + F::Output: Send + 'static, + { + spacetimedb_runtime::sim::Runtime::check_determinism(seed.0, make_future) + } -pub(crate) fn current_handle_or_new_runtime() -> anyhow::Result<(RuntimeHandle, Option)> { - spacetimedb_core::runtime::current_handle_or_new_runtime() + pub fn check_determinism_with(seed: DstSeed, make_future: M) -> F::Output + where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, + { + spacetimedb_runtime::sim::Runtime::check_determinism_with(seed.0, make_future) + } } pub(crate) fn advance_time(duration: Duration) { @@ -31,5 +80,5 @@ pub(crate) fn advance_time(duration: Duration) { } pub(crate) fn decision_source(seed: DstSeed) -> DecisionSource { - DecisionSource::new(seed) + DecisionSource::new(seed.0) } diff --git a/crates/dst/src/sim/time.rs b/crates/dst/src/sim/time.rs index 6a00836637b..59b09d26bba 100644 --- a/crates/dst/src/sim/time.rs +++ b/crates/dst/src/sim/time.rs @@ -1,215 +1,9 @@ //! Virtual time for the local DST simulator. -use std::{ - cell::RefCell, - collections::BTreeMap, - future::Future, - pin::Pin, - sync::{Arc, Mutex}, - task::{Context, Poll, Waker}, - time::Duration, +pub use spacetimedb_runtime::sim::time::{ + advance, now, sleep, timeout, try_current_handle, TimeHandle, TimeoutElapsed, }; -#[derive(Clone, Debug)] -pub struct TimeHandle { - inner: Arc>, -} - -impl TimeHandle { - pub(crate) fn new() -> Self { - Self { - inner: Arc::new(Mutex::new(TimeState::default())), - } - } - - pub fn now(&self) -> Duration { - self.inner.lock().expect("sim time poisoned").now - } - - pub fn advance(&self, duration: Duration) { - if duration.is_zero() { - return; - } - - let wakers = { - let mut state = self.inner.lock().expect("sim time poisoned"); - state.now = state.now.saturating_add(duration); - state.take_due_wakers() - }; - wake_all(wakers); - } - - pub(crate) fn wake_next_timer(&self) -> bool { - let wakers = { - let mut state = self.inner.lock().expect("sim time poisoned"); - let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { - return false; - }; - if next_deadline > state.now { - state.now = next_deadline; - } - state.take_due_wakers() - }; - let woke = !wakers.is_empty(); - wake_all(wakers); - woke - } - - fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { - let mut state = self.inner.lock().expect("sim time poisoned"); - state.timers.insert( - id, - TimerEntry { - deadline, - waker: waker.clone(), - }, - ); - } - - fn cancel_timer(&self, id: TimerId) { - self.inner.lock().expect("sim time poisoned").timers.remove(&id); - } - - fn next_timer_id(&self) -> TimerId { - let mut state = self.inner.lock().expect("sim time poisoned"); - let id = TimerId(state.next_timer_id); - state.next_timer_id = state.next_timer_id.saturating_add(1); - id - } -} - -#[derive(Debug, Default)] -struct TimeState { - now: Duration, - next_timer_id: u64, - timers: BTreeMap, -} - -impl TimeState { - fn take_due_wakers(&mut self) -> Vec { - let due = self - .timers - .iter() - .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) - .collect::>(); - due.into_iter() - .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) - .collect() - } -} - -#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -struct TimerId(u64); - -#[derive(Debug)] -struct TimerEntry { - deadline: Duration, - waker: Waker, -} - -thread_local! { - static CURRENT_TIME: RefCell> = const { RefCell::new(None) }; -} - -pub(crate) struct TimeContextGuard { - previous: Option, -} - -pub(crate) fn enter_time_context(handle: TimeHandle) -> TimeContextGuard { - let previous = CURRENT_TIME.with(|current| current.replace(Some(handle))); - TimeContextGuard { previous } -} - -pub(crate) fn try_current_handle() -> Option { - CURRENT_TIME.with(|current| current.borrow().clone()) -} - -pub fn now() -> Duration { - try_current_handle().map(|handle| handle.now()).unwrap_or_default() -} - -pub fn advance(duration: Duration) { - if let Some(handle) = try_current_handle() { - handle.advance(duration); - } -} - -pub fn sleep(duration: Duration) -> Sleep { - Sleep { - duration, - state: SleepState::Unregistered, - } -} - -impl Drop for TimeContextGuard { - fn drop(&mut self) { - CURRENT_TIME.with(|current| { - current.replace(self.previous.take()); - }); - } -} - -pub struct Sleep { - duration: Duration, - state: SleepState, -} - -enum SleepState { - Unregistered, - Registered { - handle: TimeHandle, - id: TimerId, - deadline: Duration, - }, - Done, -} - -impl Future for Sleep { - type Output = (); - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - if matches!(self.state, SleepState::Done) { - return Poll::Ready(()); - } - - if matches!(self.state, SleepState::Unregistered) { - let handle = try_current_handle().expect("sim::time::sleep polled outside sim runtime"); - let deadline = handle.now().saturating_add(self.duration); - let id = handle.next_timer_id(); - self.state = SleepState::Registered { handle, id, deadline }; - } - - let SleepState::Registered { handle, id, deadline } = &self.state else { - unreachable!("sleep state should be registered or done"); - }; - - if handle.now() >= *deadline { - let handle = handle.clone(); - let id = *id; - handle.cancel_timer(id); - self.state = SleepState::Done; - Poll::Ready(()) - } else { - handle.register_timer(*id, *deadline, cx.waker()); - Poll::Pending - } - } -} - -impl Drop for Sleep { - fn drop(&mut self) { - if let SleepState::Registered { handle, id, .. } = &self.state { - handle.cancel_timer(*id); - } - } -} - -fn wake_all(wakers: Vec) { - for waker in wakers { - waker.wake(); - } -} - #[cfg(test)] mod tests { use std::{ @@ -269,4 +63,36 @@ mod tests { assert_eq!(super::now(), Duration::from_millis(7)); }); } + + #[test] + fn timeout_returns_future_output_before_deadline() { + let mut runtime = sim::Runtime::new(DstSeed(104)).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(10), async { + super::sleep(Duration::from_millis(3)).await; + 9 + }) + .await + }); + + assert_eq!(output, Ok(9)); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + } + + #[test] + fn timeout_expires_at_virtual_deadline() { + let mut runtime = sim::Runtime::new(DstSeed(105)).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(4), async { + super::sleep(Duration::from_millis(20)).await; + 9 + }) + .await + }); + + assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + } } diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 020f0b02732..8277ba9edde 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -1,6 +1,6 @@ //! RelationalDB DST target with mocked commitlog file chaos and replay checks. -use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, sync::Arc}; +use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, sync::Arc, time::Duration}; use spacetimedb_commitlog::repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}; use spacetimedb_core::{ @@ -12,7 +12,7 @@ use spacetimedb_datastore::{ execution_context::Workload, traits::{IsolationLevel, Program}, }; -use spacetimedb_durability::{Durability, EmptyHistory, Local}; +use spacetimedb_durability::{DirectLocal, Durability, EmptyHistory}; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, @@ -55,6 +55,8 @@ pub type RelationalDbCommitlogOutcome = CommitlogWorkloadOutcome; type RelationalDbCommitlogSource = crate::workload::commitlog_ops::CommitlogWorkloadSource; type RelationalDbCommitlogProperties = PropertyRuntime; +const DURABILITY_WAIT_TIMEOUT: Duration = Duration::from_secs(30); + pub async fn run_generated_with_config_and_scenario( seed: DstSeed, scenario: TableScenarioId, @@ -216,10 +218,8 @@ struct RelationalDbEngine { last_observed_durable_offset: Option, durability: Arc, durability_opts: spacetimedb_durability::local::Options, - runtime_handle: sim::RuntimeHandle, commitlog_repo: StressCommitlogRepo, stats: RunStats, - _runtime_guard: Option, } impl RelationalDbEngine { @@ -242,15 +242,11 @@ impl RelationalDbEngine { last_observed_durable_offset: None, durability: bootstrap.durability, durability_opts: bootstrap.durability_opts, - runtime_handle: bootstrap.runtime_handle, commitlog_repo: bootstrap.commitlog_repo, stats: RunStats { - runtime: RuntimeStats { - durability_actors_started: 1, - }, + runtime: RuntimeStats::default(), ..Default::default() }, - _runtime_guard: bootstrap.runtime_guard, }; this.install_base_schema().map_err(anyhow::Error::msg)?; this.refresh_observed_durable_offset(true).map_err(anyhow::Error::msg)?; @@ -354,7 +350,6 @@ impl RelationalDbEngine { let (durability, db) = self.reopen_from_history_with_fault_retry("close/reopen")?; - self.stats.runtime.durability_actors_started += 1; self.durability = durability; self.db = Some(db); self.rebuild_table_handles_after_reopen()?; @@ -384,18 +379,14 @@ impl RelationalDbEngine { fn reopen_from_history(&self) -> Result<(Arc, RelationalDB), String> { let durability = Arc::new( - InMemoryCommitlogDurability::open_with_repo( - self.commitlog_repo.clone(), - self.runtime_handle.clone(), - self.durability_opts, - ) - .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, + InMemoryCommitlogDurability::open_with_repo(self.commitlog_repo.clone(), self.durability_opts) + .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, ); let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), snapshots: None, - runtime: self.runtime_handle.clone(), + runtime: spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), }; let (db, connected_clients) = RelationalDB::open( Identity::ZERO, @@ -990,10 +981,15 @@ impl RelationalDbEngine { if let Some(target_offset) = self.last_requested_durable_offset { let current = self.durability.durable_tx_offset().last_seen(); if current.is_none_or(|offset| offset < target_offset) { - self.durability - .durable_tx_offset() - .wait_for(target_offset) + let mut durable_offset = self.durability.durable_tx_offset(); + sim::time::timeout(DURABILITY_WAIT_TIMEOUT, durable_offset.wait_for(target_offset)) .await + .map_err(|err| { + format!( + "durability wait for tx offset {target_offset} timed out after {:?}", + err.duration() + ) + })? .map_err(|err| format!("durability wait for tx offset {target_offset} failed: {err}"))?; } } else if forced { @@ -1239,7 +1235,6 @@ impl RelationalDbEngine { drop(old_db); let (durability, db) = self.reopen_from_history_with_fault_retry("final replay check")?; - self.stats.runtime.durability_actors_started += 1; self.durability = durability; self.db = Some(db); self.rebuild_table_handles_after_reopen()?; @@ -1385,35 +1380,32 @@ impl TargetEngine for RelationalDbEngine { } type StressCommitlogRepo = FaultableRepo; -type InMemoryCommitlogDurability = Local; +type InMemoryCommitlogDurability = DirectLocal; struct RelationalDbBootstrap { db: RelationalDB, - runtime_handle: sim::RuntimeHandle, commitlog_repo: StressCommitlogRepo, durability: Arc, durability_opts: spacetimedb_durability::local::Options, - runtime_guard: Option, } fn bootstrap_relational_db( seed: DstSeed, fault_profile: CommitlogFaultProfile, ) -> anyhow::Result { - let (runtime_handle, runtime_guard) = sim::current_handle_or_new_runtime()?; let fault_config = CommitlogFaultConfig::for_profile(fault_profile); let commitlog_repo = FaultableRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024), fault_config, seed.fork(702)); let durability_opts = commitlog_stress_options(seed.fork(701)); let durability = Arc::new( - InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime_handle.clone(), durability_opts) + InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), durability_opts) .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?, ); let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), snapshots: None, - runtime: runtime_handle.clone(), + runtime: spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), }; let (db, connected_clients) = RelationalDB::open( Identity::ZERO, @@ -1429,11 +1421,9 @@ fn bootstrap_relational_db( })?; Ok(RelationalDbBootstrap { db, - runtime_handle, commitlog_repo, durability, durability_opts, - runtime_guard, }) } diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index 611acc28f94..8d5d6c584dd 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -96,7 +96,7 @@ impl CommitlogWorkloadSource { }; self.pending.push_back(CommitlogInteraction::Table(base_op)); - if self.base.has_open_read_tx() { + if self.base.has_open_read_tx() || self.base.has_open_write_tx() { return true; } @@ -190,3 +190,87 @@ impl Iterator for CommitlogWorkloadSource { self.pull_next_interaction() } } + +#[cfg(test)] +mod tests { + use spacetimedb_sats::AlgebraicType; + + use crate::{ + client::SessionId, + schema::{ColumnPlan, SchemaPlan, TablePlan}, + seed::{DstRng, DstSeed}, + workload::{ + commitlog_ops::CommitlogInteraction, + table_ops::{ScenarioPlanner, TableOperation, TableScenario, TableWorkloadInteraction}, + }, + }; + + use super::{CommitlogWorkloadProfile, CommitlogWorkloadSource}; + + #[derive(Clone)] + struct BeginThenCommitScenario; + + impl TableScenario for BeginThenCommitScenario { + fn generate_schema(&self, _rng: &mut DstRng) -> SchemaPlan { + SchemaPlan { + tables: vec![TablePlan { + name: "test_table".to_string(), + columns: vec![ColumnPlan { + name: "id".to_string(), + ty: AlgebraicType::U64, + }], + extra_indexes: vec![], + }], + } + } + + fn validate_outcome( + &self, + _schema: &SchemaPlan, + _outcome: &crate::workload::table_ops::TableWorkloadOutcome, + ) -> anyhow::Result<()> { + Ok(()) + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + if planner.active_writer() == Some(conn) { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } else { + planner.begin_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); + } + } + } + + #[test] + fn lifecycle_interactions_wait_for_open_write_tx_to_close() { + let scenario = BeginThenCommitScenario; + let mut rng = DstSeed(1).rng(); + let schema = scenario.generate_schema(&mut rng); + let profile = CommitlogWorkloadProfile { + close_reopen_pct: 100, + create_dynamic_table_pct: 100, + migrate_after_create_pct: 100, + migrate_dynamic_table_pct: 100, + drop_dynamic_table_pct: 100, + }; + let mut source = CommitlogWorkloadSource::with_profile(DstSeed(10), scenario, schema, 1, 2, profile); + + assert!(matches!( + source.next(), + Some(CommitlogInteraction::Table(TableWorkloadInteraction { + op: TableOperation::BeginTx { .. }, + .. + })) + )); + assert!(matches!( + source.next(), + Some(CommitlogInteraction::Table(TableWorkloadInteraction { + op: TableOperation::CommitTx { .. }, + .. + })) + )); + assert!(matches!(source.next(), Some(CommitlogInteraction::CloseReopen))); + } +} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index 4c3da2e236b..7ee5ba00601 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -225,6 +225,10 @@ impl TableWorkloadSource { self.model.any_read_tx() } + pub fn has_open_write_tx(&self) -> bool { + self.model.active_writer().is_some() + } + fn fill_pending(&mut self) { if self.emitted >= self.target_interactions { // Once the workload budget is spent, stop asking the scenario for diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index facf8a92734..f75470bf56a 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -6,6 +6,8 @@ mod scenarios; pub(crate) mod strategies; mod types; +#[cfg(test)] +pub(crate) use generation::ScenarioPlanner; pub(crate) use generation::TableWorkloadSource; pub(crate) use model::{PredictedOutcome, TableOracle}; pub use scenarios::TableScenarioId; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index cf9412b1c4c..41a007c9930 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -443,7 +443,7 @@ impl TableOracle { { return Ok(Some(PredictedOutcome::Error { kind: TableErrorKind::WriteConflict, - subject: Some((conn, table)), + subject: None, })); } Ok(None) @@ -606,3 +606,99 @@ fn bound_contains_upper(bound: &Bound, key: &AlgebraicValue) -> Bound::Unbounded => true, } } + +#[cfg(test)] +mod tests { + use spacetimedb_sats::AlgebraicValue; + + use crate::{client::SessionId, schema::SimRow}; + + use super::{PredictedOutcome, TableErrorKind, TableOperation, TableOracle}; + + fn row(id: u64) -> SimRow { + SimRow { + values: vec![AlgebraicValue::U64(id)], + } + } + + #[test] + fn write_conflict_prediction_does_not_request_blocking_visibility_check() { + let owner = SessionId::from_index(0); + let contender = SessionId::from_index(1); + let mut oracle = TableOracle::new(1, 2); + oracle.apply(&TableOperation::BeginTx { conn: owner }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn: contender, + table: 0, + rows: vec![row(1)], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: None, + } + ); + } + + #[test] + fn exact_duplicate_insert_is_predicted_as_no_mutation() { + let conn = SessionId::from_index(0); + let mut oracle = TableOracle::new(1, 1); + oracle.apply(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![row(1)], + }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![row(1)], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::NoMutation { + subject: Some((conn, 0)), + } + ); + } + + #[test] + fn same_id_different_row_is_predicted_as_unique_constraint_violation() { + let conn = SessionId::from_index(0); + let mut oracle = TableOracle::new(1, 1); + oracle.apply(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![SimRow { + values: vec![AlgebraicValue::U64(1), AlgebraicValue::U64(10)], + }], + }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![SimRow { + values: vec![AlgebraicValue::U64(1), AlgebraicValue::U64(11)], + }], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::Error { + kind: TableErrorKind::UniqueConstraintViolation, + subject: Some((conn, 0)), + } + ); + } +} diff --git a/crates/dst/src/workload/table_ops/scenarios/banking.rs b/crates/dst/src/workload/table_ops/scenarios/banking.rs index 1a4db5febbb..534f8ca504c 100644 --- a/crates/dst/src/workload/table_ops/scenarios/banking.rs +++ b/crates/dst/src/workload/table_ops/scenarios/banking.rs @@ -76,6 +76,11 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { let debit_rows = planner.visible_rows(conn, 0); let choose_insert = debit_rows.is_empty() || planner.roll_percent(65); + let wrap_pair_in_tx = planner.active_writer().is_none(); + if wrap_pair_in_tx { + planner.begin_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); + } if choose_insert { let row = planner.make_row(0); let mirror = row.clone(); @@ -83,6 +88,10 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { planner.insert(conn, 1, mirror.clone()); planner.push_interaction(TableWorkloadInteraction::insert(conn, 0, row.clone())); planner.push_interaction(TableWorkloadInteraction::insert(conn, 1, mirror.clone())); + if wrap_pair_in_tx { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } return; } @@ -92,4 +101,8 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { planner.delete(conn, 1, mirror.clone()); planner.push_interaction(TableWorkloadInteraction::delete(conn, 0, row.clone())); planner.push_interaction(TableWorkloadInteraction::delete(conn, 1, mirror.clone())); + if wrap_pair_in_tx { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } } diff --git a/crates/durability/src/imp/mod.rs b/crates/durability/src/imp/mod.rs index 3e00ae21ee1..4811f340b5a 100644 --- a/crates/durability/src/imp/mod.rs +++ b/crates/durability/src/imp/mod.rs @@ -2,20 +2,28 @@ pub mod local; pub use local::Local; #[cfg(any(test, feature = "test"))] -pub use testing::NoDurability; +pub use testing::{DirectLocal, NoDurability}; #[cfg(any(test, feature = "test"))] mod testing { use std::{ future, marker::PhantomData, - sync::atomic::{AtomicBool, Ordering}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, + }, }; use futures::FutureExt as _; + use spacetimedb_commitlog::{ + payload::Txdata, + repo::{Repo, RepoWithoutLockFile}, + Commitlog, Encode, + }; use tokio::sync::watch; - use crate::{Close, Durability, DurableOffset, PreparedTx, TxOffset}; + use crate::{local, Close, Durability, DurableOffset, History, PreparedTx, TxOffset}; /// A [`Durability`] impl that sends all transactions into the void. /// @@ -56,4 +64,118 @@ mod testing { future::ready(*self.durable_offset.borrow()).boxed() } } + + /// A commitlog-backed durability implementation that performs writes inline. + /// + /// This is intended for deterministic tests that want to inject their own + /// execution model instead of using [`local::Local`]'s Tokio actor. + pub struct DirectLocal + where + R: Repo, + { + clog: Arc, R>>, + durable_offset: watch::Sender>, + closed: AtomicBool, + write_lock: Mutex<()>, + } + + impl DirectLocal + where + T: Encode + Send + Sync + 'static, + R: RepoWithoutLockFile + Send + Sync + 'static, + { + pub fn open_with_repo(repo: R, opts: local::Options) -> Result { + let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); + let (durable_offset, _) = watch::channel(clog.max_committed_offset()); + Ok(Self { + clog, + durable_offset, + closed: AtomicBool::new(false), + write_lock: Mutex::new(()), + }) + } + + pub fn as_history(&self) -> impl History> + use { + self.clog.clone() + } + } + + impl DirectLocal + where + T: Encode + Send + Sync + 'static, + R: Repo + Send + Sync + 'static, + { + fn flush_and_publish(&self) -> Option { + let offset = self + .clog + .flush_and_sync() + .expect("direct local durability: commitlog flush-and-sync failed"); + if let Some(offset) = offset { + self.durable_offset.send_modify(|val| { + val.replace(offset); + }); + } + self.durable_offset.borrow().as_ref().copied() + } + } + + impl Durability for DirectLocal + where + T: Encode + Send + Sync + 'static, + R: Repo + Send + Sync + 'static, + { + type TxData = Txdata; + + fn append_tx(&self, tx: PreparedTx) { + if self.closed.load(Ordering::Relaxed) { + panic!("`close` was called on this `DirectLocal` instance"); + } + let _guard = self.write_lock.lock().expect("direct local durability lock poisoned"); + self.clog + .commit([tx.into_transaction()]) + .expect("direct local durability: commitlog write failed"); + self.flush_and_publish(); + } + + fn durable_tx_offset(&self) -> DurableOffset { + self.durable_offset.subscribe().into() + } + + fn close(&self) -> Close { + self.closed.store(true, Ordering::Relaxed); + let _guard = self.write_lock.lock().expect("direct local durability lock poisoned"); + future::ready(self.flush_and_publish()).boxed() + } + } + + #[cfg(test)] + mod tests { + use futures::FutureExt as _; + use spacetimedb_commitlog::repo::Memory; + use spacetimedb_sats::ProductValue; + + use super::*; + use crate::{Durability, Transaction}; + + #[test] + fn direct_local_publishes_durable_offset_inline() { + let durability = DirectLocal::::open_with_repo( + Memory::new(1024 * 1024), + local::Options::default(), + ) + .unwrap(); + + durability.append_tx(Box::new(Transaction { + offset: 0, + txdata: Txdata { + inputs: None, + outputs: None, + mutations: None, + }, + })); + + assert_eq!(durability.durable_tx_offset().last_seen(), Some(0)); + assert_eq!(durability.close().now_or_never().flatten(), Some(0)); + } + } } diff --git a/crates/io/Cargo.toml b/crates/io/Cargo.toml deleted file mode 100644 index e6cfc9a14f5..00000000000 --- a/crates/io/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "spacetimedb-io" -version.workspace = true -edition.workspace = true -rust-version.workspace = true -license-file = "LICENSE" -description = "Filesystem and network IO facade for SpacetimeDB crates" - -[dependencies] -tokio.workspace = true - -[lints] -workspace = true diff --git a/crates/io/build.rs b/crates/io/build.rs deleted file mode 100644 index 3982c077afc..00000000000 --- a/crates/io/build.rs +++ /dev/null @@ -1,10 +0,0 @@ -fn main() { - println!("cargo:rerun-if-env-changed=CARGO_CFG_MADSIM"); - println!("cargo:rerun-if-env-changed=CARGO_CFG_SIMULATION"); - println!("cargo:rerun-if-env-changed=CARGO_ENCODED_RUSTFLAGS"); - println!("cargo:rerun-if-env-changed=RUSTFLAGS"); - - if std::env::var_os("CARGO_CFG_MADSIM").is_some() { - println!("cargo:rustc-cfg=simulation"); - } -} diff --git a/crates/io/src/lib.rs b/crates/io/src/lib.rs deleted file mode 100644 index f00cdf90b3f..00000000000 --- a/crates/io/src/lib.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Narrow facade for SpacetimeDB-owned async IO boundaries. -//! -//! This crate currently re-exports the Tokio filesystem, IO, and network APIs -//! that SpacetimeDB code is allowed to depend on directly. -//! -//! This crate is intentionally small. It is a migration point for filesystem and -//! network APIs reached by deterministic simulation tests, not a general runtime -//! abstraction for tasks, clocks, blocking work, or shutdown. - -pub mod fs { - pub use tokio::fs::*; - - #[cfg(simulation)] - use std::{ - io::{self, Read as _}, - pin::Pin, - task::{Context, Poll}, - }; - - /// Async reader type returned by [`file_from_std`]. - #[cfg(not(simulation))] - pub type FileFromStd = tokio::fs::File; - - /// Async reader type returned by [`file_from_std`]. - #[cfg(simulation)] - pub type FileFromStd = StdFileAsyncReader; - - /// Convert a standard file handle into an async reader. - /// - /// Tokio supports this directly. The simulated filesystem type does not - /// wrap existing OS files, so simulation builds use a small `AsyncRead` - /// adapter for call sites that only need to stream an already-opened std - /// file. - #[cfg(not(simulation))] - pub fn file_from_std(file: std::fs::File) -> FileFromStd { - tokio::fs::File::from_std(file) - } - - /// Convert a standard file handle into an async reader. - #[cfg(simulation)] - pub fn file_from_std(file: std::fs::File) -> FileFromStd { - StdFileAsyncReader(file) - } - - /// Async-read adapter for standard files in simulation builds. - #[cfg(simulation)] - pub struct StdFileAsyncReader(std::fs::File); - - #[cfg(simulation)] - impl tokio::io::AsyncRead for StdFileAsyncReader { - fn poll_read( - mut self: Pin<&mut Self>, - _cx: &mut Context<'_>, - buf: &mut tokio::io::ReadBuf<'_>, - ) -> Poll> { - match self.0.read(buf.initialize_unfilled()) { - Ok(n) => { - buf.advance(n); - Poll::Ready(Ok(())) - } - Err(e) => Poll::Ready(Err(e)), - } - } - } -} - -pub mod io { - pub use tokio::io::*; -} - -pub mod net { - pub use tokio::net::*; -} diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml new file mode 100644 index 00000000000..6f62e0e6b08 --- /dev/null +++ b/crates/runtime/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "spacetimedb-runtime" +version.workspace = true +edition.workspace = true +license-file = "LICENSE" +description = "Runtime and deterministic simulation utilities for SpacetimeDB" +rust-version.workspace = true + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +futures.workspace = true +futures-util.workspace = true +tokio = { workspace = true, optional = true } +async-task = { version = "4.4", optional = true } +libc = { version = "0.2", optional = true } +tracing = { workspace = true, optional = true } + +[features] +default = ["tokio"] +tokio = ["dep:tokio"] +simulation = ["dep:async-task", "dep:libc", "dep:tracing"] diff --git a/crates/runtime/LICENSE b/crates/runtime/LICENSE new file mode 100644 index 00000000000..daef5135277 --- /dev/null +++ b/crates/runtime/LICENSE @@ -0,0 +1,731 @@ +SPACETIMEDB BUSINESS SOURCE LICENSE AGREEMENT + +Business Source License 1.1 + +Parameters + +Licensor: Clockwork Laboratories, Inc. +Licensed Work: SpacetimeDB 2.2.0 + The Licensed Work is + (c) 2023 Clockwork Laboratories, Inc. + +Additional Use Grant: You may make use of the Licensed Work provided your + application or service uses the Licensed Work with no + more than one SpacetimeDB instance in production and + provided that you do not use the Licensed Work for a + Database Service. + + A “Database Service” is a commercial offering that + allows third parties (other than your employees and + contractors) to access the functionality of the + Licensed Work by creating tables whose schemas are + controlled by such third parties. + +Change Date: 2031-04-29 + +Change License: GNU Affero General Public License v3.0 with a linking + exception + +For information about alternative licensing arrangements for the Software, +please visit: https://spacetimedb.com + +Notice + +The Business Source License (this document, or the “License”) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved. +“Business Source License” is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Business Source License 1.1 + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +TITLE. + +MariaDB hereby grants you permission to use this License’s text to license +your works, and to refer to it using the trademark “Business Source License”, +as long as you comply with the Covenants of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the “Business +Source License” name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where “compatible” means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text “None”. + +3. To specify a Change Date. + +4. Not to modify this License in any other way. + +----------------------------------------------------------------------------- + +Copyright (C) 2023 Clockwork Laboratories, Inc. + +This program is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License, version 3, as published +by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU Affero General Public License +along with this program; if not, see . + +Additional permission under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or combining it +with SpacetimeDB (or a modified version of that library), containing parts +covered by the terms of the AGPL v3.0, the licensors of this Program grant +you additional permission to convey the resulting work. + +Additional permission under GNU AGPL version 3 section 13 + +If you modify this Program, or any covered work, by linking or combining it +with SpacetimeDB (or a modified version of that library), containing parts +covered by the terms of the AGPL v3.0, the licensors of this Program grant +you additional permission that, notwithstanding any other provision of this +License, you need not prominently offer all users interacting with your +modified version remotely through a computer network an opportunity to +receive the Corresponding Source of your version from a network server at no +charge, if your version supports such interaction. This permission does not +waive or modify any other obligations or terms of the AGPL v3.0, except for +the specific requirement set forth in section 13. + +A copy of the AGPL v3.0 license is reproduced below. + + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + +Copyright © 2007 Free Software Foundation, Inc. +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble +The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + +The licenses for most software and other practical works are designed to take +away your freedom to share and change the works. By contrast, our General +Public Licenses are intended to guarantee your freedom to share and change +all versions of a program--to make sure it remains free software for all its +users. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for them if you wish), that +you receive source code or can get it if you want it, that you can change the +software or use pieces of it in new free programs, and that you know you can +do these things. + +Developers that use our General Public Licenses protect your rights with two +steps: (1) assert copyright on the software, and (2) offer you this License +which gives you legal permission to copy, distribute and/or modify the +software. + +A secondary benefit of defending all users' freedom is that improvements made +in alternate versions of the program, if they receive widespread use, become +available for other developers to incorporate. Many developers of free +software are heartened and encouraged by the resulting cooperation. However, +in the case of software used on network servers, this result may fail to come +about. The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its source +code to the public. + +The GNU Affero General Public License is designed specifically to ensure +that, in such cases, the modified source code becomes available to the +community. It requires the operator of a network server to provide the source +code of the modified version running there to the users of that server. +Therefore, public use of a modified version, on a publicly accessible server, +gives the public access to the source code of the modified version. + +An older license, called the Affero General Public License and published by +Affero, was designed to accomplish similar goals. This is a different +license, not a version of the Affero GPL, but Affero has released a new +version of the Affero GPL which permits relicensing under this license. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS +0. Definitions. +"This License" refers to version 3 of the GNU Affero General Public License. + +"Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + +"The Program" refers to any copyrightable work licensed under this License. +Each licensee is addressed as "you". "Licensees" and "recipients" may be +individuals or organizations. + +To "modify" a work means to copy from or adapt all or part of the work in a +fashion requiring copyright permission, other than the making of an exact +copy. The resulting work is called a "modified version" of the earlier work +or a work "based on" the earlier work. + +A "covered work" means either the unmodified Program or a work based on the +Program. + +To "propagate" a work means to do anything with it that, without permission, +would make you directly or secondarily liable for infringement under +applicable copyright law, except executing it on a computer or modifying a +private copy. Propagation includes copying, distribution (with or without +modification), making available to the public, and in some countries other +activities as well. + +To "convey" a work means any kind of propagation that enables other parties +to make or receive copies. Mere interaction with a user through a computer +network, with no transfer of a copy, is not conveying. + +An interactive user interface displays "Appropriate Legal Notices" to the +extent that it includes a convenient and prominently visible feature that (1) +displays an appropriate copyright notice, and (2) tells the user that there +is no warranty for the work (except to the extent that warranties are +provided), that licensees may convey the work under this License, and how to +view a copy of this License. If the interface presents a list of user +commands or options, such as a menu, a prominent item in the list meets this +criterion. + +1. Source Code. +The "source code" for a work means the preferred form of the work for making +modifications to it. "Object code" means any non-source form of a work. + +A "Standard Interface" means an interface that either is an official standard +defined by a recognized standards body, or, in the case of interfaces +specified for a particular programming language, one that is widely used +among developers working in that language. + +The "System Libraries" of an executable work include anything, other than the +work as a whole, that (a) is included in the normal form of packaging a Major +Component, but which is not part of that Major Component, and (b) serves only +to enable use of the work with that Major Component, or to implement a +Standard Interface for which an implementation is available to the public in +source code form. A "Major Component", in this context, means a major +essential component (kernel, window system, and so on) of the specific +operating system (if any) on which the executable work runs, or a compiler +used to produce the work, or an object code interpreter used to run it. + +The "Corresponding Source" for a work in object code form means all the +source code needed to generate, install, and (for an executable work) run the +object code and to modify the work, including scripts to control those +activities. However, it does not include the work's System Libraries, or +general-purpose tools or generally available free programs which are used +unmodified in performing those activities but which are not part of the work. +For example, Corresponding Source includes interface definition files +associated with source files for the work, and the source code for shared +libraries and dynamically linked subprograms that the work is specifically +designed to require, such as by intimate data communication or control flow +between those subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate +automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. + +2. Basic Permissions. +All rights granted under this License are granted for the term of copyright +on the Program, and are irrevocable provided the stated conditions are met. +This License explicitly affirms your unlimited permission to run the +unmodified Program. The output from running a covered work is covered by this +License only if the output, given its content, constitutes a covered work. +This License acknowledges your rights of fair use or other equivalent, as +provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without +conditions so long as your license otherwise remains in force. You may convey +covered works to others for the sole purpose of having them make +modifications exclusively for you, or provide you with facilities for running +those works, provided that you comply with the terms of this License in +conveying all material for which you do not control copyright. Those thus +making or running the covered works for you must do so exclusively on your +behalf, under your direction and control, on terms that prohibit them from +making any copies of your copyrighted material outside their relationship +with you. + +Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 makes it +unnecessary. + +3. Protecting Users' Legal Rights From Anti-Circumvention Law. +No covered work shall be deemed part of an effective technological measure +under any applicable law fulfilling obligations under article 11 of the WIPO +copyright treaty adopted on 20 December 1996, or similar laws prohibiting or +restricting circumvention of such measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention is +effected by exercising rights under this License with respect to the covered +work, and you disclaim any intention to limit operation or modification of +the work as a means of enforcing, against the work's users, your or third +parties' legal rights to forbid circumvention of technological measures. + +4. Conveying Verbatim Copies. +You may convey verbatim copies of the Program's source code as you receive +it, in any medium, provided that you conspicuously and appropriately publish +on each copy an appropriate copyright notice; keep intact all notices stating +that this License and any non-permissive terms added in accord with section 7 +apply to the code; keep intact all notices of the absence of any warranty; +and give all recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you +may offer support or warranty protection for a fee. + +5. Conveying Modified Source Versions. +You may convey a work based on the Program, or the modifications to produce +it from the Program, in the form of source code under the terms of section 4, +provided that you also meet all of these conditions: + +a) The work must carry prominent notices stating that you modified it, and +giving a relevant date. +b) The work must carry prominent notices stating that it is released under +this License and any conditions added under section 7. This requirement +modifies the requirement in section 4 to "keep intact all notices". +c) You must license the entire work, as a whole, under this License to anyone +who comes into possession of a copy. This License will therefore apply, along +with any applicable section 7 additional terms, to the whole of the work, and +all its parts, regardless of how they are packaged. This License gives no +permission to license the work in any other way, but it does not invalidate +such permission if you have separately received it. +d) If the work has interactive user interfaces, each must display Appropriate +Legal Notices; however, if the Program has interactive interfaces that do not +display Appropriate Legal Notices, your work need not make them do so. +A compilation of a covered work with other separate and independent works, +which are not by their nature extensions of the covered work, and which are +not combined with it such as to form a larger program, in or on a volume of a +storage or distribution medium, is called an "aggregate" if the compilation +and its resulting copyright are not used to limit the access or legal rights +of the compilation's users beyond what the individual works permit. Inclusion +of a covered work in an aggregate does not cause this License to apply to the +other parts of the aggregate. + +6. Conveying Non-Source Forms. +You may convey a covered work in object code form under the terms of sections +4 and 5, provided that you also convey the machine-readable Corresponding +Source under the terms of this License, in one of these ways: + +a) Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by the Corresponding Source fixed +on a durable physical medium customarily used for software interchange. +b) Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by a written offer, valid for at +least three years and valid for as long as you offer spare parts or customer +support for that product model, to give anyone who possesses the object code +either (1) a copy of the Corresponding Source for all the software in the +product that is covered by this License, on a durable physical medium +customarily used for software interchange, for a price no more than your +reasonable cost of physically performing this conveying of source, or (2) +access to copy the Corresponding Source from a network server at no charge. +c) Convey individual copies of the object code with a copy of the written +offer to provide the Corresponding Source. This alternative is allowed only +occasionally and noncommercially, and only if you received the object code +with such an offer, in accord with subsection 6b. +d) Convey the object code by offering access from a designated place (gratis +or for a charge), and offer equivalent access to the Corresponding Source in +the same way through the same place at no further charge. You need not +require recipients to copy the Corresponding Source along with the object +code. If the place to copy the object code is a network server, the +Corresponding Source may be on a different server (operated by you or a third +party) that supports equivalent copying facilities, provided you maintain +clear directions next to the object code saying where to find the +Corresponding Source. Regardless of what server hosts the Corresponding +Source, you remain obligated to ensure that it is available for as long as +needed to satisfy these requirements. +e) Convey the object code using peer-to-peer transmission, provided you +inform other peers where the object code and Corresponding Source of the work +are being offered to the general public at no charge under subsection 6d. +A separable portion of the object code, whose source code is excluded from +the Corresponding Source as a System Library, need not be included in +conveying the object code work. + +A "User Product" is either (1) a "consumer product", which means any tangible +personal property which is normally used for personal, family, or household +purposes, or (2) anything designed or sold for incorporation into a dwelling. +In determining whether a product is a consumer product, doubtful cases shall +be resolved in favor of coverage. For a particular product received by a +particular user, "normally used" refers to a typical or common use of that +class of product, regardless of the status of the particular user or of the +way in which the particular user actually uses, or expects or is expected to +use, the product. A product is a consumer product regardless of whether the +product has substantial commercial, industrial or non-consumer uses, unless +such uses represent the only significant mode of use of the product. + +"Installation Information" for a User Product means any methods, procedures, +authorization keys, or other information required to install and execute +modified versions of a covered work in that User Product from a modified +version of its Corresponding Source. The information must suffice to ensure +that the continued functioning of the modified object code is in no case +prevented or interfered with solely because modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as part of +a transaction in which the right of possession and use of the User Product is +transferred to the recipient in perpetuity or for a fixed term (regardless of +how the transaction is characterized), the Corresponding Source conveyed +under this section must be accompanied by the Installation Information. But +this requirement does not apply if neither you nor any third party retains +the ability to install modified object code on the User Product (for example, +the work has been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates for +a work that has been modified or installed by the recipient, or for the User +Product in which it has been modified or installed. Access to a network may +be denied when the modification itself materially and adversely affects the +operation of the network or violates the rules and protocols for +communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in +accord with this section must be in a format that is publicly documented (and +with an implementation available to the public in source code form), and must +require no special password or key for unpacking, reading or copying. + +7. Additional Terms. +"Additional permissions" are terms that supplement the terms of this License +by making exceptions from one or more of its conditions. Additional +permissions that are applicable to the entire Program shall be treated as +though they were included in this License, to the extent that they are valid +under applicable law. If additional permissions apply only to part of the +Program, that part may be used separately under those permissions, but the +entire Program remains governed by this License without regard to the +additional permissions. + +When you convey a copy of a covered work, you may at your option remove any +additional permissions from that copy, or from any part of it. (Additional +permissions may be written to require their own removal in certain cases when +you modify the work.) You may place additional permissions on material, added +by you to a covered work, for which you have or can give appropriate +copyright permission. + +Notwithstanding any other provision of this License, for material you add to +a covered work, you may (if authorized by the copyright holders of that +material) supplement the terms of this License with terms: + +a) Disclaiming warranty or limiting liability differently from the terms of +sections 15 and 16 of this License; or +b) Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices displayed +by works containing it; or +c) Prohibiting misrepresentation of the origin of that material, or requiring +that modified versions of such material be marked in reasonable ways as +different from the original version; or +d) Limiting the use for publicity purposes of names of licensors or authors +of the material; or +e) Declining to grant rights under trademark law for use of some trade names, +trademarks, or service marks; or +f) Requiring indemnification of licensors and authors of that material by +anyone who conveys the material (or modified versions of it) with contractual +assumptions of liability to the recipient, for any liability that these +contractual assumptions directly impose on those licensors and authors. +All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is governed +by this License along with a term that is a further restriction, you may +remove that term. If a license document contains a further restriction but +permits relicensing or conveying under this License, you may add to a covered +work material governed by the terms of that license document, provided that +the further restriction does not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must +place, in the relevant source files, a statement of the additional terms that +apply to those files, or a notice indicating where to find the applicable +terms. + +Additional terms, permissive or non-permissive, may be stated in the form of +a separately written license, or stated as exceptions; the above requirements +apply either way. + +8. Termination. +You may not propagate or modify a covered work except as expressly provided +under this License. Any attempt otherwise to propagate or modify it is void, +and will automatically terminate your rights under this License (including +any patent licenses granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a +particular copyright holder is reinstated (a) provisionally, unless and until +the copyright holder explicitly and finally terminates your license, and (b) +permanently, if the copyright holder fails to notify you of the violation by +some reasonable means prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated +permanently if the copyright holder notifies you of the violation by some +reasonable means, this is the first time you have received notice of +violation of this License (for any work) from that copyright holder, and you +cure the violation prior to 30 days after your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses +of parties who have received copies or rights from you under this License. If +your rights have been terminated and not permanently reinstated, you do not +qualify to receive new licenses for the same material under section 10. + +9. Acceptance Not Required for Having Copies. +You are not required to accept this License in order to receive or run a copy +of the Program. Ancillary propagation of a covered work occurring solely as a +consequence of using peer-to-peer transmission to receive a copy likewise +does not require acceptance. However, nothing other than this License grants +you permission to propagate or modify any covered work. These actions +infringe copyright if you do not accept this License. Therefore, by modifying +or propagating a covered work, you indicate your acceptance of this License +to do so. + +10. Automatic Licensing of Downstream Recipients. +Each time you convey a covered work, the recipient automatically receives a +license from the original licensors, to run, modify and propagate that work, +subject to this License. You are not responsible for enforcing compliance by +third parties with this License. + +An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered work +results from an entity transaction, each party to that transaction who +receives a copy of the work also receives whatever licenses to the work the +party's predecessor in interest had or could give under the previous +paragraph, plus a right to possession of the Corresponding Source of the work +from the predecessor in interest, if the predecessor has it or can get it +with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights +granted or affirmed under this License. For example, you may not impose a +license fee, royalty, or other charge for exercise of rights granted under +this License, and you may not initiate litigation (including a cross-claim or +counterclaim in a lawsuit) alleging that any patent claim is infringed by +making, using, selling, offering for sale, or importing the Program or any +portion of it. + +11. Patents. +A "contributor" is a copyright holder who authorizes use under this License +of the Program or a work on which the Program is based. The work thus +licensed is called the contributor's "contributor version". + +A contributor's "essential patent claims" are all patent claims owned or +controlled by the contributor, whether already acquired or hereafter +acquired, that would be infringed by some manner, permitted by this License, +of making, using, or selling its contributor version, but do not include +claims that would be infringed only as a consequence of further modification +of the contributor version. For purposes of this definition, "control" +includes the right to grant patent sublicenses in a manner consistent with +the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent +license under the contributor's essential patent claims, to make, use, sell, +offer for sale, import and otherwise run, modify and propagate the contents +of its contributor version. + +In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent (such +as an express permission to practice a patent or covenant not to sue for +patent infringement). To "grant" such a patent license to a party means to +make such an agreement or commitment not to enforce a patent against the +party. + +If you convey a covered work, knowingly relying on a patent license, and the +Corresponding Source of the work is not available for anyone to copy, free of +charge and under the terms of this License, through a publicly available +network server or other readily accessible means, then you must either (1) +cause the Corresponding Source to be so available, or (2) arrange to deprive +yourself of the benefit of the patent license for this particular work, or +(3) arrange, in a manner consistent with the requirements of this License, to +extend the patent license to downstream recipients. "Knowingly relying" means +you have actual knowledge that, but for the patent license, your conveying +the covered work in a country, or your recipient's use of the covered work in +a country, would infringe one or more identifiable patents in that country +that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, +you convey, or propagate by procuring conveyance of, a covered work, and +grant a patent license to some of the parties receiving the covered work +authorizing them to use, propagate, modify or convey a specific copy of the +covered work, then the patent license you grant is automatically extended to +all recipients of the covered work and works based on it. + +A patent license is "discriminatory" if it does not include within the scope +of its coverage, prohibits the exercise of, or is conditioned on the +non-exercise of one or more of the rights that are specifically granted under +this License. You may not convey a covered work if you are a party to an +arrangement with a third party that is in the business of distributing +software, under which you make payment to the third party based on the extent +of your activity of conveying the work, and under which the third party +grants, to any of the parties who would receive the covered work from you, a +discriminatory patent license (a) in connection with copies of the covered +work conveyed by you (or copies made from those copies), or (b) primarily for +and in connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any +implied license or other defenses to infringement that may otherwise be +available to you under applicable patent law. + +12. No Surrender of Others' Freedom. +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not excuse +you from the conditions of this License. If you cannot convey a covered work +so as to satisfy simultaneously your obligations under this License and any +other pertinent obligations, then as a consequence you may not convey it at +all. For example, if you agree to terms that obligate you to collect a +royalty for further conveying from those to whom you convey the Program, the +only way you could satisfy both those terms and this License would be to +refrain entirely from conveying the Program. + +13. Remote Network Interaction; Use with the GNU General Public License. +Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users interacting +with it remotely through a computer network (if your version supports such +interaction) an opportunity to receive the Corresponding Source of your +version by providing access to the Corresponding Source from a network server +at no charge, through some standard or customary means of facilitating +copying of software. This Corresponding Source shall include the +Corresponding Source for any work covered by version 3 of the GNU General +Public License that is incorporated pursuant to the following paragraph. + +Notwithstanding any other provision of this License, you have permission to +link or combine any covered work with a work licensed under version 3 of the +GNU General Public License into a single combined work, and to convey the +resulting work. The terms of this License will continue to apply to the part +which is the covered work, but the work with which it is combined will remain +governed by version 3 of the GNU General Public License. + +14. Revised Versions of this License. +The Free Software Foundation may publish revised and/or new versions of the +GNU Affero General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU Affero General Public +License "or any later version" applies to it, you have the option of +following the terms and conditions either of that numbered version or of any +later version published by the Free Software Foundation. If the Program does +not specify a version number of the GNU Affero General Public License, you +may choose any version ever published by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the +GNU Affero General Public License can be used, that proxy's public statement +of acceptance of a version permanently authorizes you to choose that version +for the Program. + +Later license versions may give you additional or different permissions. +However, no additional obligations are imposed on any author or copyright +holder as a result of your choosing to follow a later version. + +15. Disclaimer of Warranty. +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE +LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, +EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE +ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. +SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY +SERVICING, REPAIR OR CORRECTION. + +16. Limitation of Liability. +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL +ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE +PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE +OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR +DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR +A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH +HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +17. Interpretation of Sections 15 and 16. +If the disclaimer of warranty and limitation of liability provided above +cannot be given local legal effect according to their terms, reviewing courts +shall apply local law that most closely approximates an absolute waiver of +all civil liability in connection with the Program, unless a warranty or +assumption of liability accompanies a copy of the Program in return for a +fee. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs +If you develop a new program, and you want it to be of the greatest possible +use to the public, the best way to achieve this is to make it free software +which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach +them to the start of each source file to most effectively state the exclusion +of warranty; and each file should have at least the "copyright" line and a +pointer to where the full notice is found. + +SpacetimeDB: A database which replaces your server. +Copyright (C) 2023 Clockwork Laboratories, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +Also add information on how to contact you by electronic and paper mail. + +If your software can interact with users remotely through a computer network, +you should also make sure that it provides a way for users to get its source. +For example, if your program is a web application, its interface could +display a "Source" link that leads users to an archive of the code. There are +many ways you could offer source, and different solutions will be better for +different programs; see section 13 for the specific requirements. + +You should also get your employer (if you work as a programmer) or school, if +any, to sign a "copyright disclaimer" for the program, if necessary. For more +information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs new file mode 100644 index 00000000000..8721ddc89a2 --- /dev/null +++ b/crates/runtime/src/lib.rs @@ -0,0 +1,100 @@ +//! Runtime and deterministic simulation utilities shared by core and DST. + +use std::{fmt, future::Future, time::Duration}; + +#[cfg(feature = "simulation")] +pub mod sim; + +#[cfg(feature = "tokio")] +pub type Handle = tokio::runtime::Handle; +#[cfg(feature = "tokio")] +pub type Runtime = tokio::runtime::Runtime; + +#[derive(Clone)] +pub enum RuntimeDispatch { + #[cfg(feature = "tokio")] + Tokio(Handle), + #[cfg(feature = "simulation")] + Simulation(sim::Handle), +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RuntimeTimeout; + +impl fmt::Display for RuntimeTimeout { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("runtime operation timed out") + } +} + +impl std::error::Error for RuntimeTimeout {} + +impl RuntimeDispatch { + #[cfg(feature = "tokio")] + pub fn tokio(handle: Handle) -> Self { + Self::Tokio(handle) + } + + #[cfg(feature = "tokio")] + pub fn tokio_current() -> Self { + Self::tokio(Handle::current()) + } + + #[cfg(feature = "simulation")] + pub fn simulation(handle: sim::Handle) -> Self { + Self::Simulation(handle) + } + + #[cfg(feature = "simulation")] + pub fn simulation_current() -> Self { + Self::simulation(sim::Handle::current().expect("simulation runtime is not active on this thread")) + } + + pub fn spawn(&self, future: impl Future + Send + 'static) { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = future; + match self { + #[cfg(feature = "tokio")] + Self::Tokio(handle) => { + handle.spawn(future); + } + #[cfg(feature = "simulation")] + Self::Simulation(handle) => { + handle.spawn_on(sim::NodeId::MAIN, future).detach(); + } + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } + + pub async fn timeout( + &self, + timeout_after: Duration, + future: impl Future, + ) -> Result { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = (timeout_after, future); + match self { + #[cfg(feature = "tokio")] + Self::Tokio(_) => tokio::time::timeout(timeout_after, future) + .await + .map_err(|_| RuntimeTimeout), + #[cfg(feature = "simulation")] + Self::Simulation(_) => sim::time::timeout(timeout_after, future) + .await + .map_err(|_| RuntimeTimeout), + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } +} + +#[cfg(feature = "tokio")] +pub fn current_handle_or_new_runtime() -> anyhow::Result<(Handle, Option)> { + if let Ok(handle) = Handle::try_current() { + return Ok((handle, None)); + } + + let runtime = Runtime::new()?; + Ok((runtime.handle().clone(), Some(runtime))) +} diff --git a/crates/dst/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs similarity index 80% rename from crates/dst/src/sim/executor.rs rename to crates/runtime/src/sim/executor.rs index 394a56829fe..765b70f631b 100644 --- a/crates/dst/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -1,6 +1,7 @@ //! Minimal asynchronous executor adapted from madsim's `sim/task` loop. use std::{ + cell::RefCell, collections::BTreeMap, fmt, future::Future, @@ -17,12 +18,11 @@ use std::{ use futures_util::FutureExt; -use crate::{ - seed::DstSeed, - sim::rng::{enter_rng_context, DeterminismLog}, - sim::system_thread::enter_simulation_thread, - sim::time::{enter_time_context, TimeHandle}, - sim::Rng, +use crate::sim::{ + rng::{enter_rng_context, DeterminismLog}, + system_thread::enter_simulation_thread, + time::{enter_time_context, TimeHandle}, + Rng, }; type Runnable = async_task::Runnable; @@ -51,13 +51,14 @@ pub struct Runtime { } impl Runtime { - pub fn new(seed: DstSeed) -> anyhow::Result { + pub fn new(seed: u64) -> anyhow::Result { Ok(Self { executor: Arc::new(Executor::new(seed)), }) } pub fn block_on(&mut self, future: F) -> F::Output { + let _handle_context = enter_handle_context(self.handle()); self.executor.block_on(future) } @@ -92,7 +93,7 @@ impl Runtime { } /// Run a future twice with the same seed and fail if simulator choices diverge. - pub fn check_determinism(seed: DstSeed, make_future: fn() -> F) -> F::Output + pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output where F: Future + 'static, F::Output: Send + 'static, @@ -101,7 +102,7 @@ impl Runtime { } /// Run a future twice with the same seed and fail if simulator choices diverge. - pub fn check_determinism_with(seed: DstSeed, make_future: M) -> F::Output + pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output where M: Fn() -> F + Clone + Send + 'static, F: Future + 'static, @@ -109,7 +110,7 @@ impl Runtime { { let first = make_future.clone(); let log = thread::spawn(move || { - let mut runtime = Runtime::new(seed).expect("failed to create DST runtime"); + let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); runtime.executor.enable_determinism_log(); runtime.block_on(first()); runtime @@ -122,7 +123,7 @@ impl Runtime { .unwrap(); thread::spawn(move || { - let mut runtime = Runtime::new(seed).expect("failed to create DST runtime"); + let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); runtime.executor.enable_determinism_check(log); let output = runtime.block_on(make_future()); runtime @@ -144,6 +145,10 @@ pub struct Handle { } impl Handle { + pub fn current() -> Option { + current_handle() + } + pub fn create_node(&self) -> NodeId { self.executor.create_node() } @@ -163,6 +168,39 @@ impl Handle { { self.executor.spawn_on(node, future) } + + pub fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + 'static, + F::Output: 'static, + { + self.executor.spawn_local_on(node, future) + } +} + +thread_local! { + static CURRENT_HANDLE: RefCell> = RefCell::new(None); +} + +pub(crate) fn current_handle() -> Option { + CURRENT_HANDLE.with(|handle| handle.borrow().clone()) +} + +fn enter_handle_context(handle: Handle) -> HandleContextGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); + HandleContextGuard { previous } +} + +struct HandleContextGuard { + previous: Option, +} + +impl Drop for HandleContextGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + *slot.borrow_mut() = self.previous.take(); + }); + } } /// A spawned simulated task. @@ -184,8 +222,8 @@ impl Future for JoinHandle { } } -fn panic_with_seed(seed: DstSeed, payload: Box) -> ! { - eprintln!("note: run with --seed {} to reproduce this error", seed.0); +fn panic_with_seed(seed: u64, payload: Box) -> ! { + eprintln!("note: run with --seed {seed} to reproduce this error"); std::panic::resume_unwind(payload); } @@ -199,7 +237,7 @@ struct Executor { } impl Executor { - fn new(seed: DstSeed) -> Self { + fn new(seed: u64) -> Self { let queue = Queue::new(); let mut nodes = BTreeMap::new(); nodes.insert(NodeId::MAIN, Arc::new(NodeState::default())); @@ -272,6 +310,24 @@ impl Executor { JoinHandle { task } } + fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + 'static, + F::Output: 'static, + { + self.node_state(node); + + let sender = self.sender.clone(); + let (runnable, task) = unsafe { + async_task::Builder::new() + .metadata(node) + .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) + }; + runnable.schedule(); + + JoinHandle { task } + } + #[track_caller] fn block_on(&self, future: F) -> F::Output { let _system_thread_context = enter_simulation_thread(); @@ -448,7 +504,7 @@ mod tests { #[test] fn paused_node_does_not_run_until_resumed() { - let mut runtime = Runtime::new(DstSeed(1)).unwrap(); + let mut runtime = Runtime::new(1).unwrap(); let node = runtime.create_node(); runtime.pause(node); @@ -471,7 +527,7 @@ mod tests { #[test] fn handle_can_spawn_onto_node_from_simulated_task() { - let mut runtime = Runtime::new(DstSeed(2)).unwrap(); + let mut runtime = Runtime::new(2).unwrap(); let handle = runtime.handle(); let value = runtime.block_on(async move { @@ -482,12 +538,33 @@ mod tests { assert_eq!(value, 11); } + #[test] + fn current_handle_can_spawn_local_task_inside_runtime() { + assert!(Handle::current().is_none()); + + let mut runtime = Runtime::new(5).unwrap(); + let value = runtime.block_on(async { + let handle = Handle::current().expect("sim handle should be present inside block_on"); + let node = handle.create_node(); + let captured = std::rc::Rc::new(17); + handle + .spawn_local_on(node, async move { + yield_now().await; + *captured + }) + .await + }); + + assert_eq!(value, 17); + assert!(Handle::current().is_none()); + } + #[test] fn check_determinism_runs_future_twice() { static CALLS: AtomicUsize = AtomicUsize::new(0); CALLS.store(0, Ordering::SeqCst); - let value = Runtime::check_determinism(DstSeed(3), || async { + let value = Runtime::check_determinism(3, || async { CALLS.fetch_add(1, Ordering::SeqCst); yield_now().await; 13 @@ -503,7 +580,7 @@ mod tests { static FIRST_RUN: AtomicBool = AtomicBool::new(true); FIRST_RUN.store(true, Ordering::SeqCst); - Runtime::check_determinism(DstSeed(4), || async { + Runtime::check_determinism(4, || async { if FIRST_RUN.swap(false, Ordering::SeqCst) { yield_now().await; } diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs new file mode 100644 index 00000000000..467903cf2b4 --- /dev/null +++ b/crates/runtime/src/sim/mod.rs @@ -0,0 +1,23 @@ +//! Local deterministic simulation runtime. +//! +//! This module is deliberately small, but its executor shape follows madsim's: +//! futures are scheduled as runnable tasks and the ready queue is sampled by a +//! deterministic RNG instead of being driven by a package-level async runtime. + +mod executor; +mod rng; +mod system_thread; +pub mod time; + +use std::time::Duration; + +pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; +pub use rng::{DecisionSource, Rng}; + +pub fn advance_time(duration: Duration) { + time::advance(duration); +} + +pub fn decision_source(seed: u64) -> DecisionSource { + DecisionSource::new(seed) +} diff --git a/crates/dst/src/sim/rng.rs b/crates/runtime/src/sim/rng.rs similarity index 91% rename from crates/dst/src/sim/rng.rs rename to crates/runtime/src/sim/rng.rs index 1b59d0cffe9..09afde03031 100644 --- a/crates/dst/src/sim/rng.rs +++ b/crates/runtime/src/sim/rng.rs @@ -7,8 +7,6 @@ use std::{ }, }; -use crate::seed::DstSeed; - const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; #[derive(Clone, Debug)] @@ -20,14 +18,14 @@ pub struct Rng { } impl Rng { - pub fn new(seed: DstSeed) -> Self { + pub fn new(seed: u64) -> Self { unsafe { getentropy(ptr::null_mut(), 0) }; - if !init_std_random_state(seed.0) { + if !init_std_random_state(seed) { tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); } Self { - seed: seed.0, - state: splitmix64(seed.0), + seed, + state: splitmix64(seed), log: None, check: None, } @@ -111,23 +109,23 @@ impl Rng { pub(crate) struct DeterminismLog(Vec); #[derive(Debug)] -pub(crate) struct DecisionSource { +pub struct DecisionSource { state: AtomicU64, } impl DecisionSource { - pub(crate) fn new(seed: DstSeed) -> Self { + pub fn new(seed: u64) -> Self { Self { - state: AtomicU64::new(splitmix64(seed.0)), + state: AtomicU64::new(splitmix64(seed)), } } - pub(crate) fn sample_probability(&self, probability: f64) -> bool { + pub fn sample_probability(&self, probability: f64) -> bool { probability_sample(self.next_u64(), probability) } fn next_u64(&self) -> u64 { - let state = self.state.fetch_add(GAMMA, Ordering::Relaxed); + let state = self.state.fetch_add(GAMMA, Ordering::Relaxed).wrapping_add(GAMMA); splitmix64(state) } } @@ -297,12 +295,12 @@ mod tests { #[test] fn rng_log_check_accepts_same_sequence() { - let mut first = Rng::new(DstSeed(10)); + let mut first = Rng::new(10); first.enable_determinism_log(); let first_values = (0..8).map(|_| first.next_u64()).collect::>(); let log = first.take_determinism_log().unwrap(); - let mut second = Rng::new(DstSeed(10)); + let mut second = Rng::new(10); second.enable_determinism_check(log); let second_values = (0..8).map(|_| second.next_u64()).collect::>(); second.finish_determinism_check().unwrap(); @@ -310,22 +308,32 @@ mod tests { assert_eq!(first_values, second_values); } + #[test] + fn decision_source_matches_rng_sequence() { + let source = DecisionSource::new(12); + let mut rng = Rng::new(12); + + for _ in 0..16 { + assert_eq!(source.next_u64(), rng.next_u64()); + } + } + #[test] #[should_panic(expected = "non-determinism detected")] fn rng_log_check_rejects_different_sequence() { - let mut first = Rng::new(DstSeed(10)); + let mut first = Rng::new(10); first.enable_determinism_log(); first.next_u64(); let log = first.take_determinism_log().unwrap(); - let mut second = Rng::new(DstSeed(11)); + let mut second = Rng::new(11); second.enable_determinism_check(log); second.next_u64(); } #[test] fn getentropy_uses_current_sim_rng() { - let rng = Arc::new(Mutex::new(Rng::new(DstSeed(20)))); + let rng = Arc::new(Mutex::new(Rng::new(20))); let _guard = enter_rng_context(Arc::clone(&rng)); let mut actual = [0u8; 24]; @@ -333,7 +341,7 @@ mod tests { assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); } - let mut expected_rng = Rng::new(DstSeed(20)); + let mut expected_rng = Rng::new(20); let mut expected = [0u8; 24]; expected_rng.fill_bytes(&mut expected); assert_eq!(actual, expected); @@ -341,7 +349,7 @@ mod tests { #[test] fn std_hashmap_order_is_seeded_for_runtime_thread() { - fn order_for(seed: DstSeed) -> Vec<(u64, u64)> { + fn order_for(seed: u64) -> Vec<(u64, u64)> { std::thread::spawn(move || { let _rng = Rng::new(seed); (0..12) @@ -354,6 +362,6 @@ mod tests { .unwrap() } - assert_eq!(order_for(DstSeed(30)), order_for(DstSeed(30))); + assert_eq!(order_for(30), order_for(30)); } } diff --git a/crates/dst/src/sim/system_thread.rs b/crates/runtime/src/sim/system_thread.rs similarity index 94% rename from crates/dst/src/sim/system_thread.rs rename to crates/runtime/src/sim/system_thread.rs index 9bb3e612d7b..f395a25442a 100644 --- a/crates/dst/src/sim/system_thread.rs +++ b/crates/runtime/src/sim/system_thread.rs @@ -50,12 +50,12 @@ unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc: #[cfg(test)] mod tests { - use crate::{seed::DstSeed, sim}; + use crate::sim; #[test] #[cfg(unix)] fn runtime_forbids_system_thread_spawn() { - let mut runtime = sim::Runtime::new(DstSeed(200)).unwrap(); + let mut runtime = sim::Runtime::new(200).unwrap(); runtime.block_on(async { let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); assert!(result.is_err()); diff --git a/crates/runtime/src/sim/time.rs b/crates/runtime/src/sim/time.rs new file mode 100644 index 00000000000..2508b35b249 --- /dev/null +++ b/crates/runtime/src/sim/time.rs @@ -0,0 +1,343 @@ +//! Virtual time for the local simulation runtime. + +use std::{ + cell::RefCell, + collections::BTreeMap, + fmt, + future::Future, + pin::Pin, + sync::{Arc, Mutex}, + task::{Context, Poll, Waker}, + time::Duration, +}; + +use futures::future::{select, Either}; + +#[derive(Clone, Debug)] +pub struct TimeHandle { + inner: Arc>, +} + +impl TimeHandle { + pub fn new() -> Self { + Self { + inner: Arc::new(Mutex::new(TimeState::default())), + } + } + + pub fn now(&self) -> Duration { + self.inner.lock().expect("sim time poisoned").now + } + + pub fn advance(&self, duration: Duration) { + if duration.is_zero() { + return; + } + + let wakers = { + let mut state = self.inner.lock().expect("sim time poisoned"); + state.now = state.now.saturating_add(duration); + state.take_due_wakers() + }; + wake_all(wakers); + } + + pub fn wake_next_timer(&self) -> bool { + let wakers = { + let mut state = self.inner.lock().expect("sim time poisoned"); + let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { + return false; + }; + if next_deadline > state.now { + state.now = next_deadline; + } + state.take_due_wakers() + }; + let woke = !wakers.is_empty(); + wake_all(wakers); + woke + } + + fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { + let mut state = self.inner.lock().expect("sim time poisoned"); + state.timers.insert( + id, + TimerEntry { + deadline, + waker: waker.clone(), + }, + ); + } + + fn cancel_timer(&self, id: TimerId) { + self.inner.lock().expect("sim time poisoned").timers.remove(&id); + } + + fn next_timer_id(&self) -> TimerId { + let mut state = self.inner.lock().expect("sim time poisoned"); + let id = TimerId(state.next_timer_id); + state.next_timer_id = state.next_timer_id.saturating_add(1); + id + } +} + +impl Default for TimeHandle { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Default)] +struct TimeState { + now: Duration, + next_timer_id: u64, + timers: BTreeMap, +} + +impl TimeState { + fn take_due_wakers(&mut self) -> Vec { + let due = self + .timers + .iter() + .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) + .collect::>(); + due.into_iter() + .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) + .collect() + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct TimerId(u64); + +#[derive(Debug)] +struct TimerEntry { + deadline: Duration, + waker: Waker, +} + +thread_local! { + static CURRENT_TIME: RefCell> = const { RefCell::new(None) }; +} + +pub struct TimeContextGuard { + previous: Option, +} + +pub fn enter_time_context(handle: TimeHandle) -> TimeContextGuard { + let previous = CURRENT_TIME.with(|current| current.replace(Some(handle))); + TimeContextGuard { previous } +} + +pub fn try_current_handle() -> Option { + CURRENT_TIME.with(|current| current.borrow().clone()) +} + +pub fn now() -> Duration { + try_current_handle().map(|handle| handle.now()).unwrap_or_default() +} + +pub fn advance(duration: Duration) { + if let Some(handle) = try_current_handle() { + handle.advance(duration); + } +} + +pub fn sleep(duration: Duration) -> Sleep { + Sleep { + duration, + state: SleepState::Unregistered, + } +} + +pub async fn timeout(duration: Duration, future: impl Future) -> Result { + futures::pin_mut!(future); + let sleep = sleep(duration); + futures::pin_mut!(sleep); + + match select(future, sleep).await { + Either::Left((output, _)) => Ok(output), + Either::Right(((), _)) => Err(TimeoutElapsed { duration }), + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct TimeoutElapsed { + duration: Duration, +} + +impl TimeoutElapsed { + pub fn duration(self) -> Duration { + self.duration + } +} + +impl fmt::Display for TimeoutElapsed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "simulated timeout elapsed after {:?}", self.duration) + } +} + +impl std::error::Error for TimeoutElapsed {} + +impl Drop for TimeContextGuard { + fn drop(&mut self) { + CURRENT_TIME.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +pub struct Sleep { + duration: Duration, + state: SleepState, +} + +enum SleepState { + Unregistered, + Registered { + handle: TimeHandle, + id: TimerId, + deadline: Duration, + }, + Done, +} + +impl Future for Sleep { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if matches!(self.state, SleepState::Done) { + return Poll::Ready(()); + } + + if matches!(self.state, SleepState::Unregistered) { + let handle = try_current_handle().expect("sim::time::sleep polled outside sim runtime"); + let deadline = handle.now().saturating_add(self.duration); + let id = handle.next_timer_id(); + self.state = SleepState::Registered { handle, id, deadline }; + } + + let SleepState::Registered { handle, id, deadline } = &self.state else { + unreachable!("sleep state should be registered or done"); + }; + + if handle.now() >= *deadline { + let handle = handle.clone(); + let id = *id; + handle.cancel_timer(id); + self.state = SleepState::Done; + Poll::Ready(()) + } else { + handle.register_timer(*id, *deadline, cx.waker()); + Poll::Pending + } + } +} + +impl Drop for Sleep { + fn drop(&mut self) { + if let SleepState::Registered { handle, id, .. } = &self.state { + handle.cancel_timer(*id); + } + } +} + +fn wake_all(wakers: Vec) { + for waker in wakers { + waker.wake(); + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::{Arc, Mutex}, + time::Duration, + }; + + use crate::sim; + + #[test] + fn sleep_fast_forwards_virtual_time() { + let mut runtime = sim::Runtime::new(101).unwrap(); + + runtime.block_on(async { + assert_eq!(super::now(), Duration::ZERO); + super::sleep(Duration::from_millis(5)).await; + assert_eq!(super::now(), Duration::from_millis(5)); + }); + } + + #[test] + fn shorter_timer_wakes_first() { + let mut runtime = sim::Runtime::new(102).unwrap(); + let handle = runtime.handle(); + let order = Arc::new(Mutex::new(Vec::new())); + + runtime.block_on({ + let order = Arc::clone(&order); + async move { + let slow_order = Arc::clone(&order); + let slow = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(10)).await; + slow_order.lock().expect("order poisoned").push(10); + }); + + let fast_order = Arc::clone(&order); + let fast = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(3)).await; + fast_order.lock().expect("order poisoned").push(3); + }); + + fast.await; + slow.await; + } + }); + + assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); + assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + } + + #[test] + fn explicit_advance_moves_virtual_time() { + let mut runtime = sim::Runtime::new(103).unwrap(); + + runtime.block_on(async { + super::advance(Duration::from_millis(7)); + assert_eq!(super::now(), Duration::from_millis(7)); + }); + } + + #[test] + fn timeout_returns_future_output_before_deadline() { + let mut runtime = sim::Runtime::new(104).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(10), async { + super::sleep(Duration::from_millis(3)).await; + 9 + }) + .await + }); + + assert_eq!(output, Ok(9)); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + } + + #[test] + fn timeout_expires_at_virtual_deadline() { + let mut runtime = sim::Runtime::new(105).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(4), async { + super::sleep(Duration::from_millis(20)).await; + 9 + }) + .await + }); + + assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + } +} diff --git a/tools/ci/src/main.rs b/tools/ci/src/main.rs index 80f9c95b1a7..80abbbd880c 100644 --- a/tools/ci/src/main.rs +++ b/tools/ci/src/main.rs @@ -278,7 +278,7 @@ enum CiCmd { TypescriptTest, /// Builds the docs site. Docs, - /// Checks that core database crates use SpacetimeDB fs/net IO boundaries. + /// Checks that runtime is not used as a Tokio-shaped IO facade. IoBoundary, } @@ -312,9 +312,9 @@ fn check_io_boundary() -> Result<()> { ensure_repo_root()?; let mut violations = Vec::new(); - for root in ["crates/datastore", "crates/core"] { + for root in ["crates/runtime", "crates/datastore", "crates/core", "crates/commitlog"] { for path in tracked_rs_files_under(root)? { - check_file_for_direct_tokio_fs_net(&path, &mut violations)?; + check_file_for_runtime_io_facade(&path, &mut violations)?; } } @@ -326,38 +326,59 @@ fn check_io_boundary() -> Result<()> { eprintln!("{violation}"); } bail!( - "direct tokio::fs/tokio::net usage is forbidden in crates/datastore and crates/core; use spacetimedb_io::{{fs, net}}" + "spacetimedb_runtime must not be used as a Tokio-shaped io/fs/net facade; use Tokio directly in normal-only code and semantic seams for simulation code" ); } -fn check_file_for_direct_tokio_fs_net(path: &Path, violations: &mut Vec) -> Result<()> { +fn check_file_for_runtime_io_facade(path: &Path, violations: &mut Vec) -> Result<()> { let contents = fs::read_to_string(path)?; - let mut in_tokio_use_tree = false; + let mut in_runtime_use_tree = false; for (line_idx, line) in contents.lines().enumerate() { let line_no = line_idx + 1; let code = line.split("//").next().unwrap_or(line); - if code.contains("tokio::fs") || code.contains("tokio::net") { - violations.push(format!("{}:{line_no}: direct tokio fs/net path", path.display())); + for module in ["io", "fs", "net", "blocking_fs"] { + if code.contains(&format!("spacetimedb_runtime::{module}")) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade usage", + path.display() + )); + } + if path == Path::new("crates/runtime/src/lib.rs") && code.contains(&format!("pub mod {module}")) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade export", + path.display() + )); + } } - if in_tokio_use_tree { - if tokio_use_tree_mentions_fs_or_net(code) { - violations.push(format!("{}:{line_no}: direct tokio fs/net import", path.display())); + if in_runtime_use_tree { + for module in ["io", "fs", "net", "blocking_fs"] { + if use_tree_mentions_token(code, module) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade import", + path.display() + )); + } } if code.contains("};") { - in_tokio_use_tree = false; + in_runtime_use_tree = false; } continue; } - if code.contains("use tokio::{") { - if tokio_use_tree_mentions_fs_or_net(code) { - violations.push(format!("{}:{line_no}: direct tokio fs/net import", path.display())); + if code.contains("use spacetimedb_runtime::{") { + for module in ["io", "fs", "net", "blocking_fs"] { + if use_tree_mentions_token(code, module) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade import", + path.display() + )); + } } if !code.contains("};") { - in_tokio_use_tree = true; + in_runtime_use_tree = true; } } } @@ -365,19 +386,19 @@ fn check_file_for_direct_tokio_fs_net(path: &Path, violations: &mut Vec) Ok(()) } -fn tokio_use_tree_mentions_fs_or_net(code: &str) -> bool { +fn use_tree_mentions_token(code: &str, forbidden: &str) -> bool { let mut token = String::new(); for ch in code.chars() { if ch == '_' || ch.is_ascii_alphanumeric() { token.push(ch); continue; } - if token == "fs" || token == "net" { + if token == forbidden { return true; } token.clear(); } - token == "fs" || token == "net" + token == forbidden } fn run_dlls() -> Result<()> { From 37aa55b7dc9aaad26165cb81deb961e1ec941390 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 6 May 2026 14:55:05 +0530 Subject: [PATCH 30/74] snapshots sim --- Cargo.lock | 3 + crates/core/src/db/persistence.rs | 13 +- crates/core/src/db/relational_db.rs | 24 +- .../subscription/module_subscription_actor.rs | 1 + .../src/locking_tx_datastore/datastore.rs | 54 +++ crates/dst/Cargo.toml | 3 + crates/dst/src/client.rs | 44 -- crates/dst/src/properties.rs | 9 +- crates/dst/src/properties/rules.rs | 96 ++++- crates/dst/src/properties/runtime.rs | 272 +++++------- crates/dst/src/sim/commitlog.rs | 396 +++--------------- crates/dst/src/sim/mod.rs | 2 + crates/dst/src/sim/snapshot.rs | 194 +++++++++ crates/dst/src/sim/storage_faults.rs | 320 ++++++++++++++ crates/dst/src/targets/descriptor.rs | 17 +- .../src/targets/relational_db_commitlog.rs | 293 +++++++++++-- .../src/workload/commitlog_ops/generation.rs | 7 + crates/dst/src/workload/commitlog_ops/mod.rs | 3 +- .../dst/src/workload/commitlog_ops/types.rs | 26 ++ crates/dst/src/workload/strategy.rs | 40 -- .../table_ops/scenarios/random_crud.rs | 4 +- crates/dst/src/workload/table_ops/types.rs | 49 +-- crates/snapshot/tests/remote.rs | 1 + 23 files changed, 1206 insertions(+), 665 deletions(-) create mode 100644 crates/dst/src/sim/snapshot.rs create mode 100644 crates/dst/src/sim/storage_faults.rs diff --git a/Cargo.lock b/Cargo.lock index cf40c3e9845..651d10e7eec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8317,11 +8317,14 @@ dependencies = [ "spacetimedb-datastore", "spacetimedb-durability", "spacetimedb-lib 2.2.0", + "spacetimedb-paths", "spacetimedb-primitives 2.2.0", "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "spacetimedb-schema", + "spacetimedb-snapshot", "spacetimedb-table", + "tempfile", "tracing", "tracing-subscriber", ] diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index 7eaabe64a7b..a2f6711d98f 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -35,6 +35,12 @@ pub struct Persistence { /// Currently the expectation is that the reported size is the commitlog /// size only. pub disk_size: DiskSizeFn, + /// An optional [SnapshotRepository] used when restoring from snapshots. + /// + /// This is separate from [SnapshotWorker] so deterministic simulation + /// targets can use synchronous snapshot creation without starting the + /// Tokio-backed worker. + pub snapshot_repo: Option>, /// An optional [SnapshotWorker]. /// /// The current expectation is that snapshots are only enabled for @@ -65,6 +71,7 @@ impl Persistence { Self { durability: Arc::new(durability), disk_size: Arc::new(disk_size), + snapshot_repo: None, snapshots, runtime, } @@ -72,7 +79,9 @@ impl Persistence { /// If snapshots are enabled, get the [SnapshotRepository] they are stored in. pub fn snapshot_repo(&self) -> Option<&SnapshotRepository> { - self.snapshots.as_ref().map(|worker| worker.repo()) + self.snapshot_repo + .as_deref() + .or_else(|| self.snapshots.as_ref().map(|worker| worker.repo())) } /// Get the [TxOffset] reported as durable by the [Durability] impl. @@ -106,6 +115,7 @@ impl Persistence { |Self { durability, disk_size, + snapshot_repo: _, snapshots, runtime, }| (Some(durability), Some(disk_size), snapshots, Some(runtime)), @@ -170,6 +180,7 @@ impl PersistenceProvider for LocalPersistenceProvider { Ok(Persistence { durability, disk_size, + snapshot_repo: None, snapshots: Some(snapshot_worker), runtime: RuntimeDispatch::tokio_current(), }) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 04e4d56cb0b..43bc70a63c6 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -41,7 +41,7 @@ use spacetimedb_lib::db::raw_def::v9::{btree, RawModuleDefV9Builder, RawSql}; use spacetimedb_lib::st_var::StVarValue; use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; -use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; +use spacetimedb_paths::server::{ReplicaDir, SnapshotDirPath, SnapshotsPath}; use spacetimedb_primitives::*; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; @@ -773,6 +773,18 @@ impl RelationalDB { r } + #[tracing::instrument(level = "trace", skip_all)] + pub fn try_begin_mut_tx(&self, isolation_level: IsolationLevel, workload: Workload) -> Option { + log::trace!("TRY BEGIN MUT TX"); + let r = self.inner.try_begin_mut_tx(isolation_level, workload); + if r.is_some() { + log::trace!("ACQUIRED MUT TX"); + } else { + log::trace!("MUT TX CONTENDED"); + } + r + } + #[tracing::instrument(level = "trace", skip_all)] pub fn begin_tx(&self, workload: Workload) -> Tx { log::trace!("BEGIN TX"); @@ -883,6 +895,14 @@ impl RelationalDB { self.snapshot_worker.as_ref().map(|snap| snap.subscribe()) } + /// Capture a snapshot synchronously into `repo`. + /// + /// This is primarily used by deterministic tests which cannot use the + /// Tokio-backed [`SnapshotWorker`]. + pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result, DBError> { + Ok(self.inner.take_snapshot(repo)?) + } + /// Run a fallible function in a transaction. /// /// If the supplied function returns `Ok`, the transaction is automatically @@ -1939,6 +1959,7 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, + snapshot_repo: None, snapshots, runtime: RuntimeDispatch::tokio(rt), }; @@ -2060,6 +2081,7 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, + snapshot_repo: None, snapshots, runtime: RuntimeDispatch::tokio(rt), }; diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 14c28f32f46..7be46b25f13 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2045,6 +2045,7 @@ mod tests { Some(Persistence { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), + snapshot_repo: None, snapshots: None, runtime: crate::runtime::RuntimeDispatch::tokio(rt), }), diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index edcce91ce5e..0c5e7655d43 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -924,6 +924,28 @@ impl MutTx for Locking { } impl Locking { + pub fn try_begin_mut_tx(&self, _isolation_level: IsolationLevel, workload: Workload) -> Option { + let metrics = ExecutionMetrics::default(); + let ctx = ExecutionContext::with_workload(self.database_identity, workload); + + let timer = Instant::now(); + let committed_state_write_lock = self.committed_state.try_write_arc()?; + let sequence_state_lock = self.sequence_state.try_lock_arc()?; + let lock_wait_time = timer.elapsed(); + + Some(MutTxId { + committed_state_write_lock, + sequence_state_lock, + tx_state: TxState::default(), + lock_wait_time, + read_sets: <_>::default(), + timer, + ctx, + metrics, + _not_send: std::marker::PhantomData, + }) + } + pub fn rollback_mut_tx_downgrade(&self, tx: MutTxId, workload: Workload) -> (TxMetrics, TxId) { tx.rollback_downgrade(workload) } @@ -2802,6 +2824,38 @@ pub(crate) mod tests { Ok(()) } + #[test] + fn test_try_begin_mut_tx_reports_writer_contention() -> ResultTest<()> { + let datastore = get_datastore()?; + let tx = begin_mut_tx(&datastore); + assert!(datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .is_none()); + let _ = datastore.rollback_mut_tx(tx); + + let tx = datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .expect("write lock should be available after rollback"); + let _ = datastore.rollback_mut_tx(tx); + Ok(()) + } + + #[test] + fn test_try_begin_mut_tx_reports_read_contention() -> ResultTest<()> { + let datastore = get_datastore()?; + let tx = begin_tx(&datastore); + assert!(datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .is_none()); + let _ = datastore.release_tx(tx); + + let tx = datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .expect("write lock should be available after read release"); + let _ = datastore.rollback_mut_tx(tx); + Ok(()) + } + #[test] fn test_scheduled_table_insert_and_update() -> ResultTest<()> { // Build the minimal schema that is a valid scheduler table. diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 5814aac7e2e..4a30f6d6a6d 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -23,10 +23,13 @@ spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = " spacetimedb-commitlog = { workspace = true, features = ["test"] } spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0", features = ["test"] } spacetimedb-lib.workspace = true +spacetimedb-paths.workspace = true spacetimedb-primitives.workspace = true spacetimedb-runtime = { workspace = true, features = ["simulation"] } spacetimedb-sats.workspace = true spacetimedb-schema = { workspace = true, features = ["test"] } +spacetimedb-snapshot.workspace = true spacetimedb-table.workspace = true +tempfile.workspace = true tracing.workspace = true tracing-subscriber.workspace = true diff --git a/crates/dst/src/client.rs b/crates/dst/src/client.rs index 6d4eec570f1..84b215a7198 100644 --- a/crates/dst/src/client.rs +++ b/crates/dst/src/client.rs @@ -19,18 +19,6 @@ impl ClientId { pub const fn new(raw: u32) -> Self { Self(raw) } - - pub const fn from_index(index: usize) -> Self { - Self(index as u32) - } - - pub const fn as_u32(self) -> u32 { - self.0 - } - - pub const fn as_index(self) -> usize { - self.0 as usize - } } impl fmt::Display for ClientId { @@ -80,35 +68,3 @@ impl fmt::Display for SessionId { write!(f, "{}.session{}", self.client, self.generation) } } - -/// Logical server endpoint used by future client/network/replication workloads. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct EndpointId(u32); - -impl EndpointId { - pub const ZERO: Self = Self(0); - - pub const fn new(raw: u32) -> Self { - Self(raw) - } - - pub const fn as_u32(self) -> u32 { - self.0 - } -} - -/// Logical node identifier for future replication and multi-node targets. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct NodeId(u32); - -impl NodeId { - pub const ZERO: Self = Self(0); - - pub const fn new(raw: u32) -> Self { - Self(raw) - } - - pub const fn as_u32(self) -> u32 { - self.0 - } -} diff --git a/crates/dst/src/properties.rs b/crates/dst/src/properties.rs index 136c96ac550..11d652fcaec 100644 --- a/crates/dst/src/properties.rs +++ b/crates/dst/src/properties.rs @@ -17,6 +17,7 @@ //! //! - Safety properties: `NotCrash`, `ErrorMatchesOracle`, //! `NoMutationMatchesModel`, `DurableReplayMatchesModel`, +//! `SnapshotCaptureMaintainsPrefix`, `SnapshotRestoreWithinDurablePrefix`, //! `BankingTablesMatch`, and `DynamicMigrationAutoInc`. //! - Model/oracle properties: `PointLookupMatchesModel`, //! `PredicateCountMatchesModel`, `RangeScanMatchesModel`, @@ -38,7 +39,7 @@ use crate::{ client::SessionId, schema::{SchemaPlan, SimRow}, workload::{ - commitlog_ops::DurableReplaySummary, + commitlog_ops::{DurableReplaySummary, SnapshotObservation}, table_ops::{TableErrorKind, TableWorkloadInteraction, TableWorkloadOutcome}, }, }; @@ -85,6 +86,10 @@ pub(crate) enum PropertyKind { DynamicMigrationAutoInc, /// Safety: durable replay state equals the oracle committed model. DurableReplayMatchesModel, + /// Safety: failed snapshot capture does not publish a newer usable snapshot. + SnapshotCaptureMaintainsPrefix, + /// Safety: restored snapshots are within the durable prefix. + SnapshotRestoreWithinDurablePrefix, /// Safety: observed errors match the model-predicted error class. ErrorMatchesOracle, /// Safety: model-predicted no-op interactions do not mutate visible state. @@ -165,6 +170,7 @@ pub(crate) enum CommitlogObservation { Applied, Skipped, DynamicMigrationProbe(DynamicMigrationProbe), + Snapshot(SnapshotObservation), DurableReplay(DurableReplaySummary), } @@ -227,6 +233,7 @@ enum PropertyEvent<'a> { }, CommitOrRollback, DynamicMigrationProbe(&'a DynamicMigrationProbe), + SnapshotCapture(&'a SnapshotObservation), DurableReplay(&'a DurableReplaySummary), TableWorkloadFinished(&'a TableWorkloadOutcome), } diff --git a/crates/dst/src/properties/rules.rs b/crates/dst/src/properties/rules.rs index 95acd563ad8..cb3f5bfc5d9 100644 --- a/crates/dst/src/properties/rules.rs +++ b/crates/dst/src/properties/rules.rs @@ -5,7 +5,10 @@ use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use crate::{ client::SessionId, schema::{SchemaPlan, SimRow}, - workload::table_ops::{TableOperation, TableScenario}, + workload::{ + commitlog_ops::SnapshotCaptureStatus, + table_ops::{TableOperation, TableScenario}, + }, }; use super::{PropertyContext, PropertyEvent, PropertyKind, TableMutation, TableObservation, TargetPropertyAccess}; @@ -29,6 +32,8 @@ pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { PropertyKind::BankingTablesMatch => Box::::default(), PropertyKind::DynamicMigrationAutoInc => Box::::default(), PropertyKind::DurableReplayMatchesModel => Box::::default(), + PropertyKind::SnapshotCaptureMaintainsPrefix => Box::::default(), + PropertyKind::SnapshotRestoreWithinDurablePrefix => Box::::default(), PropertyKind::ErrorMatchesOracle => Box::::default(), PropertyKind::NoMutationMatchesModel => Box::::default(), PropertyKind::PointLookupMatchesModel => Box::::default(), @@ -347,8 +352,93 @@ impl PropertyRule for DurableReplayMatchesModelRule { let expected_rows = ctx.models.table().committed_rows(); if replay.base_rows != expected_rows { return Err(format!( - "[DurableReplayMatchesModel] replayed durable state mismatch at offset {:?}: expected={expected_rows:?} actual={:?}", - replay.durable_offset, replay.base_rows + "[DurableReplayMatchesModel] replayed durable state mismatch at durable_offset {:?}, restored_snapshot {:?}: expected={expected_rows:?} actual={:?}", + replay.durable_offset, replay.restored_snapshot_offset, replay.base_rows + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct SnapshotCaptureMaintainsPrefixRule; + +impl PropertyRule for SnapshotCaptureMaintainsPrefixRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::SnapshotCapture(snapshot) = event else { + return Ok(()); + }; + + match snapshot.status { + SnapshotCaptureStatus::Captured { offset } => { + if snapshot.latest_after != Some(offset) { + return Err(format!( + "[SnapshotCaptureMaintainsPrefix] captured offset {offset}, but latest snapshot is {:?}: {snapshot:?}", + snapshot.latest_after + )); + } + let durable = snapshot.durable_offset.ok_or_else(|| { + format!( + "[SnapshotCaptureMaintainsPrefix] captured snapshot {offset} without a durable offset: {snapshot:?}" + ) + })?; + if offset > durable { + return Err(format!( + "[SnapshotCaptureMaintainsPrefix] captured snapshot {offset} beyond durable offset {durable}: {snapshot:?}" + )); + } + } + SnapshotCaptureStatus::SkippedInjectedFault => { + if snapshot.latest_after > snapshot.latest_before { + return Err(format!( + "[SnapshotCaptureMaintainsPrefix] injected snapshot fault published newer snapshot: before={:?}, after={:?}", + snapshot.latest_before, snapshot.latest_after + )); + } + } + SnapshotCaptureStatus::SkippedOpenTransaction | SnapshotCaptureStatus::SkippedNoSnapshotCreated => { + if snapshot.latest_after != snapshot.latest_before { + return Err(format!( + "[SnapshotCaptureMaintainsPrefix] skipped snapshot changed latest snapshot: before={:?}, after={:?}, status={:?}", + snapshot.latest_before, snapshot.latest_after, snapshot.status + )); + } + } + } + Ok(()) + } +} + +#[derive(Default)] +struct SnapshotRestoreWithinDurablePrefixRule; + +impl PropertyRule for SnapshotRestoreWithinDurablePrefixRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::DurableReplay(replay) = event else { + return Ok(()); + }; + let Some(snapshot_offset) = replay.restored_snapshot_offset else { + return Ok(()); + }; + let durable_offset = replay.durable_offset.ok_or_else(|| { + format!( + "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset} without durable offset: {replay:?}" + ) + })?; + if snapshot_offset > durable_offset { + return Err(format!( + "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset} beyond durable offset {durable_offset}: {replay:?}" + )); + } + if replay.latest_snapshot_offset == Some(snapshot_offset) { + return Ok(()); + } + if let Some(latest) = replay.latest_snapshot_offset + && latest <= durable_offset + && latest > snapshot_offset + { + return Err(format!( + "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset}, but newer usable snapshot {latest} exists within durable offset {durable_offset}: {replay:?}" )); } Ok(()) diff --git a/crates/dst/src/properties/runtime.rs b/crates/dst/src/properties/runtime.rs index d42dce8e467..52e0140da37 100644 --- a/crates/dst/src/properties/runtime.rs +++ b/crates/dst/src/properties/runtime.rs @@ -7,7 +7,7 @@ use crate::{ core::{StreamingProperties, TargetEngine}, schema::{SchemaPlan, SimRow}, workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary}, + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary, SnapshotObservation}, table_ops::{ PredictedOutcome, TableErrorKind, TableOracle, TableScenario, TableWorkloadInteraction, TableWorkloadOutcome, @@ -119,72 +119,63 @@ impl PropertyRuntime { runtime } - pub fn on_table_interaction( - &mut self, - access: &dyn TargetPropertyAccess, - interaction: &TableWorkloadInteraction, - ) -> Result<(), String> { - self.models.apply(interaction); + fn observe_event(&mut self, access: &dyn TargetPropertyAccess, event: PropertyEvent<'_>) -> Result<(), String> { let ctx = PropertyContext { access, models: &self.models, }; for entry in &mut self.rules { - entry.rule.observe(&ctx, PropertyEvent::TableInteractionApplied)?; + entry.rule.observe(&ctx, event.clone())?; } Ok(()) } - pub fn on_mutations( + fn on_table_interaction( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + self.models.apply(interaction); + self.observe_event(access, PropertyEvent::TableInteractionApplied) + } + + fn on_mutations( &mut self, access: &dyn TargetPropertyAccess, conn: SessionId, mutations: &[TableMutation], in_tx: bool, ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for mutation in mutations { match mutation { TableMutation::Inserted { table, requested: _, returned, - } => { - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::RowInserted { - conn, - table: *table, - returned, - in_tx, - }, - )?; - } - } - TableMutation::Deleted { table, row } => { - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::RowDeleted { - conn, - table: *table, - row, - in_tx, - }, - )?; - } - } + } => self.observe_event( + access, + PropertyEvent::RowInserted { + conn, + table: *table, + returned, + in_tx, + }, + )?, + TableMutation::Deleted { table, row } => self.observe_event( + access, + PropertyEvent::RowDeleted { + conn, + table: *table, + row, + in_tx, + }, + )?, } } Ok(()) } - pub fn on_observed_error( + fn on_observed_error( &mut self, access: &dyn TargetPropertyAccess, observed: TableErrorKind, @@ -192,49 +183,35 @@ impl PropertyRuntime { subject: Option<(SessionId, usize)>, interaction: &TableWorkloadInteraction, ) -> Result<(), String> { - let ctx = PropertyContext { + self.observe_event( access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::ObservedError { - observed, - predicted, - subject, - interaction, - }, - )?; - } - Ok(()) + PropertyEvent::ObservedError { + observed, + predicted, + subject, + interaction, + }, + ) } - pub fn on_no_mutation( + fn on_no_mutation( &mut self, access: &dyn TargetPropertyAccess, subject: Option<(SessionId, usize)>, interaction: &TableWorkloadInteraction, observation: &TableObservation, ) -> Result<(), String> { - let ctx = PropertyContext { + self.observe_event( access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::NoMutation { - subject, - interaction, - observation, - }, - )?; - } - Ok(()) + PropertyEvent::NoMutation { + subject, + interaction, + observation, + }, + ) } - pub fn on_point_lookup( + fn on_point_lookup( &mut self, access: &dyn TargetPropertyAccess, conn: SessionId, @@ -242,25 +219,18 @@ impl PropertyRuntime { id: u64, actual: &Option, ) -> Result<(), String> { - let ctx = PropertyContext { + self.observe_event( access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::PointLookup { - conn, - table, - id, - actual, - }, - )?; - } - Ok(()) + PropertyEvent::PointLookup { + conn, + table, + id, + actual, + }, + ) } - pub fn on_predicate_count( + fn on_predicate_count( &mut self, access: &dyn TargetPropertyAccess, conn: SessionId, @@ -269,27 +239,20 @@ impl PropertyRuntime { value: &AlgebraicValue, actual: usize, ) -> Result<(), String> { - let ctx = PropertyContext { + self.observe_event( access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::PredicateCount { - conn, - table, - col, - value, - actual, - }, - )?; - } - Ok(()) + PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + }, + ) } #[allow(clippy::too_many_arguments)] - pub fn on_range_scan( + fn on_range_scan( &mut self, access: &dyn TargetPropertyAccess, conn: SessionId, @@ -299,101 +262,63 @@ impl PropertyRuntime { upper: &Bound, actual: &[SimRow], ) -> Result<(), String> { - let ctx = PropertyContext { + self.observe_event( access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe( - &ctx, - PropertyEvent::RangeScan { - conn, - table, - cols, - lower, - upper, - actual, - }, - )?; - } - Ok(()) + PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + }, + ) } - pub fn on_full_scan( + fn on_full_scan( &mut self, access: &dyn TargetPropertyAccess, conn: SessionId, table: usize, actual: &[SimRow], ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry - .rule - .observe(&ctx, PropertyEvent::FullScan { conn, table, actual })?; - } - Ok(()) + self.observe_event(access, PropertyEvent::FullScan { conn, table, actual }) } - pub fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe(&ctx, PropertyEvent::CommitOrRollback)?; - } - Ok(()) + fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { + self.observe_event(access, PropertyEvent::CommitOrRollback) } - pub fn on_dynamic_migration_probe( + fn on_dynamic_migration_probe( &mut self, access: &dyn TargetPropertyAccess, probe: &DynamicMigrationProbe, ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe(&ctx, PropertyEvent::DynamicMigrationProbe(probe))?; - } - Ok(()) + self.observe_event(access, PropertyEvent::DynamicMigrationProbe(probe)) } - pub fn on_durable_replay( + fn on_snapshot_capture( + &mut self, + access: &dyn TargetPropertyAccess, + snapshot: &SnapshotObservation, + ) -> Result<(), String> { + self.observe_event(access, PropertyEvent::SnapshotCapture(snapshot)) + } + + fn on_durable_replay( &mut self, access: &dyn TargetPropertyAccess, replay: &DurableReplaySummary, ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry.rule.observe(&ctx, PropertyEvent::DurableReplay(replay))?; - } - Ok(()) + self.observe_event(access, PropertyEvent::DurableReplay(replay)) } - pub fn on_table_workload_finish( + fn on_table_workload_finish( &mut self, access: &dyn TargetPropertyAccess, outcome: &TableWorkloadOutcome, ) -> Result<(), String> { - let ctx = PropertyContext { - access, - models: &self.models, - }; - for entry in &mut self.rules { - entry - .rule - .observe(&ctx, PropertyEvent::TableWorkloadFinished(outcome))?; - } - Ok(()) + self.observe_event(access, PropertyEvent::TableWorkloadFinished(outcome)) } fn observe_table_observation( @@ -487,6 +412,9 @@ where self.observe_table_observation(engine, table_interaction, table_observation) } (_, CommitlogObservation::DynamicMigrationProbe(probe)) => self.on_dynamic_migration_probe(engine, probe), + (CommitlogInteraction::TakeSnapshot, CommitlogObservation::Snapshot(snapshot)) => { + self.on_snapshot_capture(engine, snapshot) + } (_, CommitlogObservation::DurableReplay(replay)) => self.on_durable_replay(engine, replay), (_, CommitlogObservation::Applied | CommitlogObservation::Skipped) => Ok(()), (other, observation) => Err(format!( @@ -523,6 +451,8 @@ impl Default for PropertyRuntime { PropertyKind::BankingTablesMatch, PropertyKind::DynamicMigrationAutoInc, PropertyKind::DurableReplayMatchesModel, + PropertyKind::SnapshotCaptureMaintainsPrefix, + PropertyKind::SnapshotRestoreWithinDurablePrefix, PropertyKind::ErrorMatchesOracle, PropertyKind::NoMutationMatchesModel, PropertyKind::PointLookupMatchesModel, diff --git a/crates/dst/src/sim/commitlog.rs b/crates/dst/src/sim/commitlog.rs index 0cefde7ede9..a7d98bcc852 100644 --- a/crates/dst/src/sim/commitlog.rs +++ b/crates/dst/src/sim/commitlog.rs @@ -3,11 +3,6 @@ use std::{ fmt, io::{self, BufRead, Read, Seek, Write}, - sync::{ - atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, - Arc, - }, - time::Duration, }; use spacetimedb_commitlog::{ @@ -15,115 +10,20 @@ use spacetimedb_commitlog::{ segment::FileLike, }; -use crate::{config::CommitlogFaultProfile, seed::DstSeed, sim}; +use crate::{ + seed::DstSeed, + sim::storage_faults::{ + is_injected_fault_text, ShortIoKind, StorageFaultConfig, StorageFaultController, StorageFaultDomain, + StorageFaultKind, StorageFaultSummary, + }, +}; -const INJECTED_DISK_ERROR_PREFIX: &str = "dst injected disk "; +pub(crate) type CommitlogFaultConfig = StorageFaultConfig; +pub(crate) type CommitlogFaultSummary = StorageFaultSummary; /// Returns true if `text` contains an error created by this fault layer. pub(crate) fn is_injected_disk_error_text(text: &str) -> bool { - text.contains(INJECTED_DISK_ERROR_PREFIX) -} - -/// Configurable fault profile for a DST-only commitlog repository wrapper. -#[derive(Clone, Copy, Debug)] -pub(crate) struct CommitlogFaultConfig { - profile: CommitlogFaultProfile, - enabled: bool, - latency_prob: f64, - long_latency_prob: f64, - short_io_prob: f64, - read_error_prob: f64, - write_error_prob: f64, - flush_error_prob: f64, - fsync_error_prob: f64, - open_error_prob: f64, - metadata_error_prob: f64, - max_short_io_divisor: usize, -} - -impl CommitlogFaultConfig { - pub(crate) fn for_profile(profile: CommitlogFaultProfile) -> Self { - match profile { - CommitlogFaultProfile::Off => Self { - profile, - enabled: false, - latency_prob: 0.0, - long_latency_prob: 0.0, - short_io_prob: 0.0, - read_error_prob: 0.0, - write_error_prob: 0.0, - flush_error_prob: 0.0, - fsync_error_prob: 0.0, - open_error_prob: 0.0, - metadata_error_prob: 0.0, - max_short_io_divisor: 2, - }, - CommitlogFaultProfile::Light => Self { - profile, - enabled: true, - latency_prob: 0.20, - long_latency_prob: 0.04, - short_io_prob: 0.03, - read_error_prob: 0.0, - write_error_prob: 0.0, - flush_error_prob: 0.0, - fsync_error_prob: 0.0, - open_error_prob: 0.0, - metadata_error_prob: 0.0, - max_short_io_divisor: 2, - }, - CommitlogFaultProfile::Default => Self { - profile, - enabled: true, - latency_prob: 0.35, - long_latency_prob: 0.08, - short_io_prob: 0.08, - read_error_prob: 0.0, - write_error_prob: 0.0, - flush_error_prob: 0.0, - fsync_error_prob: 0.0, - open_error_prob: 0.0, - metadata_error_prob: 0.0, - max_short_io_divisor: 2, - }, - CommitlogFaultProfile::Aggressive => Self { - profile, - enabled: true, - latency_prob: 0.65, - long_latency_prob: 0.18, - short_io_prob: 0.20, - // The current local durability actor does not recover from I/O errors, - // so profile-driven runs stay with latency and short I/O. The counters - // and hooks stay here for targeted tests once the target can classify - // those failures instead of treating them as harness errors. - read_error_prob: 0.0, - write_error_prob: 0.0, - flush_error_prob: 0.0, - fsync_error_prob: 0.0, - open_error_prob: 0.0, - metadata_error_prob: 0.0, - max_short_io_divisor: 4, - }, - } - } - - pub(crate) fn enabled(&self) -> bool { - self.enabled - } -} - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub(crate) struct CommitlogFaultSummary { - pub(crate) profile: CommitlogFaultProfile, - pub(crate) latency: usize, - pub(crate) short_read: usize, - pub(crate) short_write: usize, - pub(crate) read_error: usize, - pub(crate) write_error: usize, - pub(crate) flush_error: usize, - pub(crate) fsync_error: usize, - pub(crate) open_error: usize, - pub(crate) metadata_error: usize, + is_injected_fault_text(StorageFaultDomain::Disk, text) } /// DST-only repo wrapper that makes the in-memory commitlog backend behave less like RAM. @@ -135,14 +35,14 @@ pub(crate) struct CommitlogFaultSummary { #[derive(Clone, Debug)] pub(crate) struct FaultableRepo { inner: R, - faults: FaultController, + faults: StorageFaultController, } impl FaultableRepo { pub(crate) fn new(inner: R, config: CommitlogFaultConfig, seed: DstSeed) -> Self { Self { inner, - faults: FaultController::new(config, seed), + faults: StorageFaultController::new(config, StorageFaultDomain::Disk, seed), } } @@ -161,7 +61,7 @@ impl FaultableRepo { impl fmt::Display for FaultableRepo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}+faultable({})", self.inner, self.faults.config.profile) + write!(f, "{}+faultable({})", self.inner, self.faults.summary().profile) } } @@ -170,24 +70,24 @@ impl Repo for FaultableRepo { type SegmentReader = FaultableReader; fn create_segment(&self, offset: u64) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Open)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Open)?; self.inner .create_segment(offset) .map(|inner| FaultableSegment::new(inner, self.faults.clone())) } fn open_segment_reader(&self, offset: u64) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Open)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Open)?; self.inner .open_segment_reader(offset) .map(|inner| FaultableReader::new(inner, self.faults.clone())) } fn open_segment_writer(&self, offset: u64) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Open)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Open)?; self.inner .open_segment_writer(offset) .map(|inner| FaultableSegment::new(inner, self.faults.clone())) @@ -198,38 +98,38 @@ impl Repo for FaultableRepo { } fn remove_segment(&self, offset: u64) -> io::Result<()> { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.remove_segment(offset) } fn compress_segment(&self, offset: u64) -> io::Result<()> { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.compress_segment(offset) } fn existing_offsets(&self) -> io::Result> { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.existing_offsets() } fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.create_offset_index(offset, cap) } fn remove_offset_index(&self, offset: TxOffset) -> io::Result<()> { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.remove_offset_index(offset) } fn get_offset_index(&self, offset: TxOffset) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.get_offset_index(offset) } } @@ -238,19 +138,19 @@ impl RepoWithoutLockFile for FaultableRepo {} pub(crate) struct FaultableSegment { inner: S, - faults: FaultController, + faults: StorageFaultController, } impl FaultableSegment { - fn new(inner: S, faults: FaultController) -> Self { + fn new(inner: S, faults: StorageFaultController) -> Self { Self { inner, faults } } } impl Read for FaultableSegment { fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Read)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); self.inner.read(&mut buf[..len]) } @@ -258,63 +158,63 @@ impl Read for FaultableSegment { impl Write for FaultableSegment { fn write(&mut self, buf: &[u8]) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Write)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Write)?; let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Write); self.inner.write(&buf[..len]) } fn flush(&mut self) -> io::Result<()> { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Flush)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Flush)?; self.inner.flush() } } impl Seek for FaultableSegment { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { - self.faults.maybe_disk_latency(); + self.faults.maybe_latency(); self.inner.seek(pos) } } impl SegmentLen for FaultableSegment { fn segment_len(&mut self) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.segment_len() } } impl FileLike for FaultableSegment { fn fsync(&mut self) -> io::Result<()> { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Fsync)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Fsync)?; self.inner.fsync() } fn ftruncate(&mut self, tx_offset: u64, size: u64) -> io::Result<()> { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.ftruncate(tx_offset, size) } } pub(crate) struct FaultableReader { inner: S, - faults: FaultController, + faults: StorageFaultController, } impl FaultableReader { - fn new(inner: S, faults: FaultController) -> Self { + fn new(inner: S, faults: StorageFaultController) -> Self { Self { inner, faults } } } impl Read for FaultableReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Read)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); self.inner.read(&mut buf[..len]) } @@ -322,8 +222,8 @@ impl Read for FaultableReader { impl BufRead for FaultableReader { fn fill_buf(&mut self) -> io::Result<&[u8]> { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Read)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; let buf = self.inner.fill_buf()?; let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); Ok(&buf[..len]) @@ -336,15 +236,15 @@ impl BufRead for FaultableReader { impl Seek for FaultableReader { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { - self.faults.maybe_disk_latency(); + self.faults.maybe_latency(); self.inner.seek(pos) } } impl SegmentLen for FaultableReader { fn segment_len(&mut self) -> io::Result { - self.faults.maybe_disk_latency(); - self.faults.maybe_error(FaultKind::Metadata)?; + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.segment_len() } } @@ -355,192 +255,12 @@ impl SegmentReader for FaultableReader { } } -#[derive(Clone, Debug)] -struct FaultController { - config: CommitlogFaultConfig, - counters: Arc, - decisions: Arc, - time: Option, - armed: Arc, - suspended: Arc, -} - -impl FaultController { - fn new(config: CommitlogFaultConfig, seed: DstSeed) -> Self { - Self { - config, - counters: Arc::default(), - decisions: Arc::new(sim::decision_source(seed)), - time: sim::time::try_current_handle(), - armed: Arc::new(AtomicBool::new(false)), - suspended: Arc::default(), - } - } - - fn enable(&self) { - self.armed.store(true, Ordering::Relaxed); - } - - fn active(&self) -> bool { - self.config.enabled() && self.armed.load(Ordering::Relaxed) && self.suspended.load(Ordering::Relaxed) == 0 - } - - fn with_suspended(&self, f: impl FnOnce() -> T) -> T { - self.suspended.fetch_add(1, Ordering::Relaxed); - let _guard = SuspendFaultsGuard { - suspended: self.suspended.clone(), - }; - f() - } - - fn maybe_disk_latency(&self) { - if self.sample(self.config.latency_prob) { - self.counters.latency.fetch_add(1, Ordering::Relaxed); - let latency = if self.sample(self.config.long_latency_prob) { - Duration::from_millis(25) - } else { - Duration::from_millis(1) - }; - if let Some(time) = &self.time { - time.advance(latency); - } else { - sim::advance_time(latency); - } - } - } - - fn maybe_error(&self, kind: FaultKind) -> io::Result<()> { - if self.sample(kind.probability(&self.config)) { - kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); - return Err(io::Error::other(kind.message())); - } - Ok(()) - } - - fn maybe_short_len(&self, len: usize, kind: ShortIoKind) -> usize { - if len <= 1 { - return len; - } - if !self.sample(self.config.short_io_prob) { - return len; - } - - kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); - let divisor = self.config.max_short_io_divisor.max(2); - (len / divisor).max(1) - } - - fn sample(&self, probability: f64) -> bool { - if !self.active() || probability <= 0.0 { - return false; - } - - self.decisions.sample_probability(probability) - } - - fn summary(&self) -> CommitlogFaultSummary { - CommitlogFaultSummary { - profile: self.config.profile, - latency: self.counters.latency.load(Ordering::Relaxed) as usize, - short_read: self.counters.short_read.load(Ordering::Relaxed) as usize, - short_write: self.counters.short_write.load(Ordering::Relaxed) as usize, - read_error: self.counters.read_error.load(Ordering::Relaxed) as usize, - write_error: self.counters.write_error.load(Ordering::Relaxed) as usize, - flush_error: self.counters.flush_error.load(Ordering::Relaxed) as usize, - fsync_error: self.counters.fsync_error.load(Ordering::Relaxed) as usize, - open_error: self.counters.open_error.load(Ordering::Relaxed) as usize, - metadata_error: self.counters.metadata_error.load(Ordering::Relaxed) as usize, - } - } -} - -struct SuspendFaultsGuard { - suspended: Arc, -} - -impl Drop for SuspendFaultsGuard { - fn drop(&mut self) { - self.suspended.fetch_sub(1, Ordering::Relaxed); - } -} - -#[derive(Debug, Default)] -struct FaultCounters { - latency: AtomicU64, - short_read: AtomicU64, - short_write: AtomicU64, - read_error: AtomicU64, - write_error: AtomicU64, - flush_error: AtomicU64, - fsync_error: AtomicU64, - open_error: AtomicU64, - metadata_error: AtomicU64, -} - -#[derive(Clone, Copy)] -enum ShortIoKind { - Read, - Write, -} - -impl ShortIoKind { - fn counter(self, counters: &FaultCounters) -> &AtomicU64 { - match self { - Self::Read => &counters.short_read, - Self::Write => &counters.short_write, - } - } -} - -#[derive(Clone, Copy)] -enum FaultKind { - Read, - Write, - Flush, - Fsync, - Open, - Metadata, -} - -impl FaultKind { - fn probability(self, config: &CommitlogFaultConfig) -> f64 { - match self { - Self::Read => config.read_error_prob, - Self::Write => config.write_error_prob, - Self::Flush => config.flush_error_prob, - Self::Fsync => config.fsync_error_prob, - Self::Open => config.open_error_prob, - Self::Metadata => config.metadata_error_prob, - } - } - - fn counter(self, counters: &FaultCounters) -> &AtomicU64 { - match self { - Self::Read => &counters.read_error, - Self::Write => &counters.write_error, - Self::Flush => &counters.flush_error, - Self::Fsync => &counters.fsync_error, - Self::Open => &counters.open_error, - Self::Metadata => &counters.metadata_error, - } - } - - fn message(self) -> &'static str { - match self { - Self::Read => "dst injected disk read error", - Self::Write => "dst injected disk write error", - Self::Flush => "dst injected disk flush error", - Self::Fsync => "dst injected disk fsync error", - Self::Open => "dst injected disk open error", - Self::Metadata => "dst injected disk metadata error", - } - } -} - #[cfg(test)] mod tests { use std::io::{BufRead, Cursor}; + use crate::config::CommitlogFaultProfile; + use super::*; fn always_short_read_config() -> CommitlogFaultConfig { @@ -562,7 +282,7 @@ mod tests { #[test] fn buf_read_path_applies_short_read_faults() { - let faults = FaultController::new(always_short_read_config(), DstSeed(55)); + let faults = StorageFaultController::new(always_short_read_config(), StorageFaultDomain::Disk, DstSeed(55)); faults.enable(); let mut reader = FaultableReader::new(Cursor::new(vec![1, 2, 3, 4]), faults.clone()); diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs index fce9b4bd663..73b2f156519 100644 --- a/crates/dst/src/sim/mod.rs +++ b/crates/dst/src/sim/mod.rs @@ -5,6 +5,8 @@ //! deterministic RNG instead of being driven by a package-level async runtime. pub(crate) mod commitlog; +pub(crate) mod snapshot; +pub(crate) mod storage_faults; pub mod time; use std::{future::Future, time::Duration}; diff --git a/crates/dst/src/sim/snapshot.rs b/crates/dst/src/sim/snapshot.rs new file mode 100644 index 00000000000..a09d66ac96a --- /dev/null +++ b/crates/dst/src/sim/snapshot.rs @@ -0,0 +1,194 @@ +//! Production snapshot storage with deterministic fault injection. +//! +//! This is intentionally a semantic snapshot seam, not a filesystem facade. +//! Targets can use it to model snapshot lifecycle behavior while still writing +//! and reading real `SnapshotRepository` data. + +use std::sync::Arc; + +use spacetimedb_core::db::relational_db::{open_snapshot_repo, RelationalDB}; +use spacetimedb_durability::TxOffset; +use spacetimedb_lib::Identity; +use spacetimedb_paths::{server::SnapshotsPath, FromPathUnchecked}; +use spacetimedb_snapshot::SnapshotRepository; +use tempfile::TempDir; + +use crate::{ + seed::DstSeed, + sim::storage_faults::{ + is_injected_fault_text, StorageFaultConfig, StorageFaultController, StorageFaultDomain, StorageFaultKind, + StorageFaultSummary, + }, +}; + +pub(crate) type SnapshotFaultConfig = StorageFaultConfig; + +/// Returns true if `text` contains an error created by this snapshot fault layer. +pub(crate) fn is_injected_snapshot_error_text(text: &str) -> bool { + is_injected_fault_text(StorageFaultDomain::Snapshot, text) +} + +pub(crate) struct SnapshotRestoreRepo { + pub(crate) repo: Option>, + pub(crate) restored_snapshot_offset: Option, + pub(crate) latest_snapshot_offset: Option, +} + +/// Real snapshot repository wrapped with deterministic operation-level faults. +/// +/// The bytes/pages are written and read by `spacetimedb-snapshot`; this wrapper +/// only decides whether a DST operation reaches that repository. That keeps +/// restore semantics aligned with production without requiring the Tokio-backed +/// `SnapshotWorker` inside the simulator. +/// +/// This is the intended boundary for the current DST target. It exercises +/// capture/restore behavior, retry classification, and replay correctness. It +/// does not model torn snapshot pages or byte-level corruption; those require a +/// deeper repository abstraction inside `spacetimedb-snapshot`. +pub(crate) struct BuggifiedSnapshotRepo { + _root: TempDir, + repo: Arc, + faults: StorageFaultController, +} + +impl BuggifiedSnapshotRepo { + pub(crate) fn new(config: SnapshotFaultConfig, seed: DstSeed) -> anyhow::Result { + let root = tempfile::Builder::new() + .prefix("spacetimedb-dst-snapshots-") + .tempdir()?; + let path = SnapshotsPath::from_path_unchecked(root.path()); + let repo = open_snapshot_repo(path, Identity::ZERO, 0) + .map_err(|err| anyhow::anyhow!("open DST snapshot repo failed: {err}"))?; + Ok(Self { + _root: root, + repo, + faults: StorageFaultController::new(config, StorageFaultDomain::Snapshot, seed), + }) + } + + pub(crate) fn enable_faults(&self) { + self.faults.enable(); + } + + pub(crate) fn fault_summary(&self) -> StorageFaultSummary { + self.faults.summary() + } + + pub(crate) fn with_faults_suspended(&self, f: impl FnOnce() -> T) -> T { + self.faults.with_suspended(f) + } + + pub(crate) fn latest_snapshot_unfaulted(&self) -> Result, String> { + self.with_faults_suspended(|| { + self.repo + .latest_snapshot() + .map_err(|err| format!("snapshot metadata read failed: {err}")) + }) + } + + pub(crate) fn capture_from(&self, db: &RelationalDB) -> Result, String> { + self.faults.maybe_latency(); + self.inject(StorageFaultKind::Open)?; + self.inject(StorageFaultKind::Metadata)?; + self.inject(StorageFaultKind::Write)?; + self.inject(StorageFaultKind::Fsync)?; + + let created = db + .take_snapshot(&self.repo) + .map_err(|err| format!("snapshot capture failed: {err}"))?; + if created.is_none() { + return Ok(None); + } + + self.repo + .latest_snapshot() + .map_err(|err| format!("snapshot metadata after capture failed: {err}")) + } + + pub(crate) fn repo_for_restore(&self, durable_offset: Option) -> Result { + let latest_snapshot_offset = self.latest_snapshot_unfaulted()?; + self.faults.maybe_latency(); + self.inject(StorageFaultKind::Metadata)?; + let Some(durable_offset) = durable_offset else { + return Ok(SnapshotRestoreRepo { + repo: None, + restored_snapshot_offset: None, + latest_snapshot_offset, + }); + }; + let restored_snapshot_offset = self + .repo + .latest_snapshot_older_than(durable_offset) + .map_err(|err| format!("snapshot metadata before restore failed: {err}"))?; + if restored_snapshot_offset.is_none() { + return Ok(SnapshotRestoreRepo { + repo: None, + restored_snapshot_offset, + latest_snapshot_offset, + }); + } + + self.inject(StorageFaultKind::Open)?; + self.inject(StorageFaultKind::Read)?; + Ok(SnapshotRestoreRepo { + repo: Some(self.repo.clone()), + restored_snapshot_offset, + latest_snapshot_offset, + }) + } + + fn inject(&self, kind: StorageFaultKind) -> Result<(), String> { + self.faults.maybe_error(kind).map_err(|err| err.to_string()) + } +} + +#[cfg(test)] +mod tests { + use crate::{config::CommitlogFaultProfile, seed::DstSeed}; + + use super::*; + + fn no_faults() -> SnapshotFaultConfig { + SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Off) + } + + fn always_metadata_error() -> SnapshotFaultConfig { + SnapshotFaultConfig { + enabled: true, + metadata_error_prob: 1.0, + ..SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Default) + } + } + + #[test] + fn repo_without_snapshots_is_not_used_for_restore() { + let repo = BuggifiedSnapshotRepo::new(no_faults(), DstSeed(41)).unwrap(); + + assert!(repo.repo_for_restore(Some(0)).unwrap().repo.is_none()); + } + + #[test] + fn injected_metadata_error_is_counted_and_recognizable() { + let repo = BuggifiedSnapshotRepo::new(always_metadata_error(), DstSeed(42)).unwrap(); + repo.enable_faults(); + + let err = match repo.repo_for_restore(Some(0)) { + Ok(_) => panic!("expected injected snapshot metadata error"), + Err(err) => err, + }; + + assert!(is_injected_snapshot_error_text(&err)); + assert_eq!(repo.fault_summary().metadata_error, 1); + } + + #[test] + fn suspended_faults_allow_restore_probe() { + let repo = BuggifiedSnapshotRepo::new(always_metadata_error(), DstSeed(43)).unwrap(); + repo.enable_faults(); + + let restore = repo.with_faults_suspended(|| repo.repo_for_restore(Some(0))); + + assert!(restore.unwrap().repo.is_none()); + assert_eq!(repo.fault_summary().metadata_error, 0); + } +} diff --git a/crates/dst/src/sim/storage_faults.rs b/crates/dst/src/sim/storage_faults.rs new file mode 100644 index 00000000000..2bc72fd3bea --- /dev/null +++ b/crates/dst/src/sim/storage_faults.rs @@ -0,0 +1,320 @@ +//! Shared storage fault-injection primitives for DST simulation helpers. + +use std::{ + io, + sync::{ + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; + +use crate::{config::CommitlogFaultProfile, seed::DstSeed, sim}; + +const INJECTED_ERROR_PREFIX: &str = "dst injected "; + +pub(crate) fn is_injected_fault_text(domain: StorageFaultDomain, text: &str) -> bool { + text.contains(&format!("{INJECTED_ERROR_PREFIX}{} ", domain.label())) +} + +/// API-level storage fault profile for DST-only storage wrappers. +#[derive(Clone, Copy, Debug)] +pub(crate) struct StorageFaultConfig { + pub(crate) profile: CommitlogFaultProfile, + pub(crate) enabled: bool, + pub(crate) latency_prob: f64, + pub(crate) long_latency_prob: f64, + pub(crate) short_io_prob: f64, + pub(crate) read_error_prob: f64, + pub(crate) write_error_prob: f64, + pub(crate) flush_error_prob: f64, + pub(crate) fsync_error_prob: f64, + pub(crate) open_error_prob: f64, + pub(crate) metadata_error_prob: f64, + pub(crate) max_short_io_divisor: usize, +} + +impl StorageFaultConfig { + pub(crate) fn for_profile(profile: CommitlogFaultProfile) -> Self { + match profile { + CommitlogFaultProfile::Off => Self { + profile, + enabled: false, + latency_prob: 0.0, + long_latency_prob: 0.0, + short_io_prob: 0.0, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Light => Self { + profile, + enabled: true, + latency_prob: 0.20, + long_latency_prob: 0.04, + short_io_prob: 0.03, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Default => Self { + profile, + enabled: true, + latency_prob: 0.35, + long_latency_prob: 0.08, + short_io_prob: 0.08, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Aggressive => Self { + profile, + enabled: true, + latency_prob: 0.65, + long_latency_prob: 0.18, + short_io_prob: 0.20, + // Current profile-driven runs stay with latency and short I/O. + // Error hooks are available for targeted tests once targets can + // classify transient storage failures instead of treating them + // as harness errors. + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 4, + }, + } + } + + pub(crate) fn enabled(&self) -> bool { + self.enabled + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub(crate) struct StorageFaultSummary { + pub(crate) profile: CommitlogFaultProfile, + pub(crate) latency: usize, + pub(crate) short_read: usize, + pub(crate) short_write: usize, + pub(crate) read_error: usize, + pub(crate) write_error: usize, + pub(crate) flush_error: usize, + pub(crate) fsync_error: usize, + pub(crate) open_error: usize, + pub(crate) metadata_error: usize, +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum StorageFaultDomain { + Disk, + Snapshot, +} + +impl StorageFaultDomain { + fn label(self) -> &'static str { + match self { + Self::Disk => "disk", + Self::Snapshot => "snapshot", + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct StorageFaultController { + config: StorageFaultConfig, + domain: StorageFaultDomain, + counters: Arc, + decisions: Arc, + time: Option, + armed: Arc, + suspended: Arc, +} + +impl StorageFaultController { + pub(crate) fn new(config: StorageFaultConfig, domain: StorageFaultDomain, seed: DstSeed) -> Self { + Self { + config, + domain, + counters: Arc::default(), + decisions: Arc::new(sim::decision_source(seed)), + time: sim::time::try_current_handle(), + armed: Arc::new(AtomicBool::new(false)), + suspended: Arc::default(), + } + } + + pub(crate) fn enable(&self) { + self.armed.store(true, Ordering::Relaxed); + } + + pub(crate) fn with_suspended(&self, f: impl FnOnce() -> T) -> T { + self.suspended.fetch_add(1, Ordering::Relaxed); + let _guard = SuspendFaultsGuard { + suspended: self.suspended.clone(), + }; + f() + } + + pub(crate) fn maybe_latency(&self) { + if self.sample(self.config.latency_prob) { + self.counters.latency.fetch_add(1, Ordering::Relaxed); + let latency = if self.sample(self.config.long_latency_prob) { + Duration::from_millis(25) + } else { + Duration::from_millis(1) + }; + if let Some(time) = &self.time { + time.advance(latency); + } else { + sim::advance_time(latency); + } + } + } + + pub(crate) fn maybe_error(&self, kind: StorageFaultKind) -> io::Result<()> { + if self.sample(kind.probability(&self.config)) { + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + return Err(io::Error::other(kind.message(self.domain))); + } + Ok(()) + } + + pub(crate) fn maybe_short_len(&self, len: usize, kind: ShortIoKind) -> usize { + if len <= 1 { + return len; + } + if !self.sample(self.config.short_io_prob) { + return len; + } + + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + let divisor = self.config.max_short_io_divisor.max(2); + (len / divisor).max(1) + } + + pub(crate) fn summary(&self) -> StorageFaultSummary { + StorageFaultSummary { + profile: self.config.profile, + latency: self.counters.latency.load(Ordering::Relaxed) as usize, + short_read: self.counters.short_read.load(Ordering::Relaxed) as usize, + short_write: self.counters.short_write.load(Ordering::Relaxed) as usize, + read_error: self.counters.read_error.load(Ordering::Relaxed) as usize, + write_error: self.counters.write_error.load(Ordering::Relaxed) as usize, + flush_error: self.counters.flush_error.load(Ordering::Relaxed) as usize, + fsync_error: self.counters.fsync_error.load(Ordering::Relaxed) as usize, + open_error: self.counters.open_error.load(Ordering::Relaxed) as usize, + metadata_error: self.counters.metadata_error.load(Ordering::Relaxed) as usize, + } + } + + fn active(&self) -> bool { + self.config.enabled() && self.armed.load(Ordering::Relaxed) && self.suspended.load(Ordering::Relaxed) == 0 + } + + fn sample(&self, probability: f64) -> bool { + if !self.active() || probability <= 0.0 { + return false; + } + + self.decisions.sample_probability(probability) + } +} + +struct SuspendFaultsGuard { + suspended: Arc, +} + +impl Drop for SuspendFaultsGuard { + fn drop(&mut self) { + self.suspended.fetch_sub(1, Ordering::Relaxed); + } +} + +#[derive(Debug, Default)] +struct FaultCounters { + latency: AtomicU64, + short_read: AtomicU64, + short_write: AtomicU64, + read_error: AtomicU64, + write_error: AtomicU64, + flush_error: AtomicU64, + fsync_error: AtomicU64, + open_error: AtomicU64, + metadata_error: AtomicU64, +} + +#[derive(Clone, Copy)] +pub(crate) enum ShortIoKind { + Read, + Write, +} + +impl ShortIoKind { + fn counter(self, counters: &FaultCounters) -> &AtomicU64 { + match self { + Self::Read => &counters.short_read, + Self::Write => &counters.short_write, + } + } +} + +#[derive(Clone, Copy)] +pub(crate) enum StorageFaultKind { + Read, + Write, + Flush, + Fsync, + Open, + Metadata, +} + +impl StorageFaultKind { + fn probability(self, config: &StorageFaultConfig) -> f64 { + match self { + Self::Read => config.read_error_prob, + Self::Write => config.write_error_prob, + Self::Flush => config.flush_error_prob, + Self::Fsync => config.fsync_error_prob, + Self::Open => config.open_error_prob, + Self::Metadata => config.metadata_error_prob, + } + } + + fn counter(self, counters: &FaultCounters) -> &AtomicU64 { + match self { + Self::Read => &counters.read_error, + Self::Write => &counters.write_error, + Self::Flush => &counters.flush_error, + Self::Fsync => &counters.fsync_error, + Self::Open => &counters.open_error, + Self::Metadata => &counters.metadata_error, + } + } + + fn message(self, domain: StorageFaultDomain) -> String { + let action = match self { + Self::Read => "read", + Self::Write => "write", + Self::Flush => "flush", + Self::Fsync => "fsync", + Self::Open => "open", + Self::Metadata => "metadata", + }; + format!("{INJECTED_ERROR_PREFIX}{} {action} error", domain.label()) + } +} diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index 91c522fbd42..ec0ede37d93 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -50,8 +50,8 @@ fn format_relational_db_commitlog_outcome( "ok target={} seed={} steps={}\n", "\n", "schema: tables={} columns={} max_columns={} indexes={} extra_indexes={}\n", - "durability: durable_commits={} replay_tables={}\n", - "interactions: table={} creates={} drops={} migrates={} reopens={} reopen_skipped={} skipped={}\n", + "durability: durable_commits={} replay_tables={} restored_snapshot={:?} latest_snapshot={:?}\n", + "interactions: table={} creates={} drops={} migrates={} snapshots={} snapshot_created={} snapshot_skipped={} reopens={} reopen_skipped={} skipped={}\n", "table_ops:\n", " tx_control: begin={} commit={} rollback={} begin_read={} release_read={} begin_conflict={} write_conflict={}\n", " writes: insert={} delete={} exact_dup={} unique_conflict={} missing_delete={} batch_insert={} batch_delete={} reinsert={}\n", @@ -59,6 +59,7 @@ fn format_relational_db_commitlog_outcome( " reads: point_lookup={} predicate_count={} range_scan={} full_scan={}\n", "transactions: begin={} commit={} rollback={} auto_commit={} read_tx={}\n", "disk_faults: profile={} latency={} short_read={} short_write={} errors(read={} write={} flush={} fsync={} open={} metadata={})\n", + "snapshot_faults: profile={} latency={} errors(read={} write={} fsync={} open={} metadata={})\n", "runtime: known_tasks={} durability_actors={} alive_tasks={}" ), target, @@ -71,10 +72,15 @@ fn format_relational_db_commitlog_outcome( outcome.schema.extra_indexes, outcome.durable_commit_count, outcome.replay_table_count, + outcome.replay.restored_snapshot_offset, + outcome.replay.latest_snapshot_offset, outcome.interactions.table, outcome.interactions.create_dynamic_table, outcome.interactions.drop_dynamic_table, outcome.interactions.migrate_dynamic_table, + outcome.interactions.snapshot_requested, + outcome.interactions.snapshot_created, + outcome.interactions.snapshot_skipped, outcome.interactions.close_reopen_applied, outcome.interactions.close_reopen_skipped, outcome.interactions.skipped, @@ -114,6 +120,13 @@ fn format_relational_db_commitlog_outcome( outcome.disk_faults.fsync_error, outcome.disk_faults.open_error, outcome.disk_faults.metadata_error, + outcome.snapshot_faults.profile, + outcome.snapshot_faults.latency, + outcome.snapshot_faults.read_error, + outcome.snapshot_faults.write_error, + outcome.snapshot_faults.fsync_error, + outcome.snapshot_faults.open_error, + outcome.snapshot_faults.metadata_error, outcome.runtime.known_runtime_tasks_scheduled, outcome.runtime.durability_actors_started, alive_tasks diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 8277ba9edde..94d7a71f722 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -29,7 +29,7 @@ use tracing::{debug, info, trace}; use crate::{ client::SessionId, - config::{CommitlogFaultProfile, RunConfig}, + config::RunConfig, core::{self, TargetEngine}, properties::{ CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableMutation, TableObservation, @@ -40,9 +40,13 @@ use crate::{ sim::{ self, commitlog::{is_injected_disk_error_text, CommitlogFaultConfig, CommitlogFaultSummary, FaultableRepo}, + snapshot::{is_injected_snapshot_error_text, BuggifiedSnapshotRepo, SnapshotFaultConfig}, }, workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary}, + commitlog_ops::{ + CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary, + SnapshotCaptureStatus, SnapshotObservation, + }, commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, table_ops::{ ConnectionWriteState, TableErrorKind, TableInteractionCase, TableOperation, TableScenario, TableScenarioId, @@ -81,6 +85,26 @@ fn build( RelationalDbCommitlogSource, RelationalDbEngine, RelationalDbCommitlogProperties, +)> { + build_with_fault_configs( + seed, + scenario, + config, + CommitlogFaultConfig::for_profile(config.commitlog_fault_profile), + SnapshotFaultConfig::for_profile(config.commitlog_fault_profile), + ) +} + +fn build_with_fault_configs( + seed: DstSeed, + scenario: TableScenarioId, + config: &RunConfig, + commitlog_fault_config: CommitlogFaultConfig, + snapshot_fault_config: SnapshotFaultConfig, +) -> anyhow::Result<( + RelationalDbCommitlogSource, + RelationalDbEngine, + RelationalDbCommitlogProperties, )> { let mut connection_rng = seed.fork(121).rng(); let num_connections = connection_rng.index(3) + 1; @@ -93,7 +117,13 @@ fn build( num_connections, config.max_interactions_or_default(usize::MAX), ); - let engine = RelationalDbEngine::new(seed, &schema, num_connections, config.commitlog_fault_profile)?; + let engine = RelationalDbEngine::new_with_fault_configs( + seed, + &schema, + num_connections, + commitlog_fault_config, + snapshot_fault_config, + )?; let properties = PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections); Ok((generator, engine, properties)) } @@ -134,6 +164,7 @@ impl RunStats { CommitlogInteraction::CreateDynamicTable { .. } => self.interactions.create_dynamic_table += 1, CommitlogInteraction::DropDynamicTable { .. } => self.interactions.drop_dynamic_table += 1, CommitlogInteraction::MigrateDynamicTable { .. } => self.interactions.migrate_dynamic_table += 1, + CommitlogInteraction::TakeSnapshot => self.interactions.snapshot_requested += 1, CommitlogInteraction::CloseReopen => self.interactions.close_reopen_requested += 1, } } @@ -151,6 +182,16 @@ impl RunStats { _ => {} } } + if matches!(interaction, CommitlogInteraction::TakeSnapshot) { + match observation { + CommitlogObservation::Snapshot(SnapshotObservation { + status: SnapshotCaptureStatus::Captured { .. }, + .. + }) => self.interactions.snapshot_created += 1, + CommitlogObservation::Snapshot(_) => self.interactions.snapshot_skipped += 1, + _ => {} + } + } } fn record_table_operation(&mut self, case: TableInteractionCase) { @@ -205,6 +246,13 @@ impl RunStats { } } +struct ReopenedRelationalDb { + durability: Arc, + db: RelationalDB, + restored_snapshot_offset: Option, + latest_snapshot_offset: Option, +} + /// Engine executing mixed table+lifecycle interactions while recording mocked durable history. struct RelationalDbEngine { db: Option, @@ -216,20 +264,24 @@ struct RelationalDbEngine { step: usize, last_requested_durable_offset: Option, last_observed_durable_offset: Option, + last_restored_snapshot_offset: Option, + latest_snapshot_offset: Option, durability: Arc, durability_opts: spacetimedb_durability::local::Options, commitlog_repo: StressCommitlogRepo, + snapshot_repo: StressSnapshotRepo, stats: RunStats, } impl RelationalDbEngine { - fn new( + fn new_with_fault_configs( seed: DstSeed, schema: &SchemaPlan, num_connections: usize, - fault_profile: CommitlogFaultProfile, + commitlog_fault_config: CommitlogFaultConfig, + snapshot_fault_config: SnapshotFaultConfig, ) -> anyhow::Result { - let bootstrap = bootstrap_relational_db(seed.fork(700), fault_profile)?; + let bootstrap = bootstrap_relational_db(seed.fork(700), commitlog_fault_config, snapshot_fault_config)?; let mut this = Self { db: Some(bootstrap.db), execution: ConnectionWriteState::new(num_connections), @@ -240,9 +292,12 @@ impl RelationalDbEngine { step: 0, last_requested_durable_offset: None, last_observed_durable_offset: None, + last_restored_snapshot_offset: None, + latest_snapshot_offset: None, durability: bootstrap.durability, durability_opts: bootstrap.durability_opts, commitlog_repo: bootstrap.commitlog_repo, + snapshot_repo: bootstrap.snapshot_repo, stats: RunStats { runtime: RuntimeStats::default(), ..Default::default() @@ -251,6 +306,7 @@ impl RelationalDbEngine { this.install_base_schema().map_err(anyhow::Error::msg)?; this.refresh_observed_durable_offset(true).map_err(anyhow::Error::msg)?; this.commitlog_repo.enable_faults(); + this.snapshot_repo.enable_faults(); Ok(this) } @@ -319,6 +375,7 @@ impl RelationalDbEngine { CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), + CommitlogInteraction::TakeSnapshot => self.take_snapshot().await, CommitlogInteraction::CloseReopen => self.close_and_reopen().await, }?; if !matches!(interaction, CommitlogInteraction::CloseReopen) { @@ -348,10 +405,12 @@ impl RelationalDbEngine { drop(old_db); info!("starting in-memory durability"); - let (durability, db) = self.reopen_from_history_with_fault_retry("close/reopen")?; + let reopened = self.reopen_from_history_with_fault_retry("close/reopen")?; - self.durability = durability; - self.db = Some(db); + self.durability = reopened.durability; + self.db = Some(reopened.db); + self.last_restored_snapshot_offset = reopened.restored_snapshot_offset; + self.latest_snapshot_offset = reopened.latest_snapshot_offset; self.rebuild_table_handles_after_reopen()?; self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); let replay = self.durable_replay_summary()?; @@ -363,28 +422,69 @@ impl RelationalDbEngine { Ok(CommitlogObservation::DurableReplay(replay)) } - fn reopen_from_history_with_fault_retry( - &self, - context: &'static str, - ) -> Result<(Arc, RelationalDB), String> { + async fn take_snapshot(&mut self) -> Result { + let latest_before = self.snapshot_repo.latest_snapshot_unfaulted()?; + if self.execution.active_writer.is_some() + || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) + || self.read_tx_by_connection.iter().any(|tx| tx.is_some()) + { + trace!("skip snapshot while transaction is open"); + return self.snapshot_observation(latest_before, SnapshotCaptureStatus::SkippedOpenTransaction); + } + + self.wait_for_requested_durability(true).await?; + match self.snapshot_repo.capture_from(self.db()?) { + Ok(Some(offset)) => { + debug!(offset, "captured DST snapshot"); + self.snapshot_observation(latest_before, SnapshotCaptureStatus::Captured { offset }) + } + Ok(None) => self.snapshot_observation(latest_before, SnapshotCaptureStatus::SkippedNoSnapshotCreated), + Err(err) if is_injected_snapshot_error_text(&err) => { + trace!(error = %err, "injected snapshot fault skipped snapshot capture"); + self.snapshot_observation(latest_before, SnapshotCaptureStatus::SkippedInjectedFault) + } + Err(err) => Err(err), + } + } + + fn snapshot_observation( + &mut self, + latest_before: Option, + status: SnapshotCaptureStatus, + ) -> Result { + let latest_after = self.snapshot_repo.latest_snapshot_unfaulted()?; + self.latest_snapshot_offset = latest_after; + Ok(CommitlogObservation::Snapshot(SnapshotObservation { + durable_offset: self.last_observed_durable_offset, + latest_before, + latest_after, + status, + })) + } + + fn reopen_from_history_with_fault_retry(&self, context: &'static str) -> Result { match self.reopen_from_history() { Ok(reopened) => Ok(reopened), - Err(err) if is_injected_disk_error_text(&err) => { - trace!(error = %err, "retrying {context} with injected disk faults suspended"); - self.commitlog_repo.with_faults_suspended(|| self.reopen_from_history()) + Err(err) if is_injected_disk_error_text(&err) || is_injected_snapshot_error_text(&err) => { + trace!(error = %err, "retrying {context} with injected storage faults suspended"); + self.commitlog_repo + .with_faults_suspended(|| self.snapshot_repo.with_faults_suspended(|| self.reopen_from_history())) } Err(err) => Err(err), } } - fn reopen_from_history(&self) -> Result<(Arc, RelationalDB), String> { + fn reopen_from_history(&self) -> Result { let durability = Arc::new( InMemoryCommitlogDurability::open_with_repo(self.commitlog_repo.clone(), self.durability_opts) .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, ); + let durable_offset = durability.durable_tx_offset().last_seen(); + let snapshot_restore = self.snapshot_repo.repo_for_restore(durable_offset)?; let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), + snapshot_repo: snapshot_restore.repo, snapshots: None, runtime: spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), }; @@ -402,7 +502,12 @@ impl RelationalDbEngine { "unexpected connected clients after reopen: {connected_clients:?}" )); } - Ok((durability, db)) + Ok(ReopenedRelationalDb { + durability, + db, + restored_snapshot_offset: snapshot_restore.restored_snapshot_offset, + latest_snapshot_offset: snapshot_restore.latest_snapshot_offset, + }) } fn rebuild_table_handles_after_reopen(&mut self) -> Result<(), String> { @@ -608,7 +713,8 @@ impl RelationalDbEngine { if self.execution.tx_by_connection[conn.as_index()].is_some() { return Err(format!("connection {conn} already has open transaction")); } - if self.execution.active_writer.is_some() { + if let Some(owner) = self.execution.active_writer { + self.expect_write_lock_contended(conn, owner, "begin write transaction")?; return Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)); } self.execution.tx_by_connection[conn.as_index()] = Some( @@ -710,7 +816,8 @@ impl RelationalDbEngine { return result; } - if self.execution.active_writer.is_some() { + if let Some(owner) = self.execution.active_writer { + self.expect_write_lock_contended(conn, owner, "auto-commit write")?; return Ok(Err(TableErrorKind::WriteConflict)); } @@ -763,6 +870,7 @@ impl RelationalDbEngine { } if let Some(owner) = self.execution.active_writer { + self.expect_write_lock_contended(conn, owner, "auto-commit write")?; return Err(format!( "connection {conn} cannot auto-commit write while connection {owner} owns lock" )); @@ -793,6 +901,17 @@ impl RelationalDbEngine { Ok(value) } + fn expect_write_lock_contended(&self, contender: SessionId, owner: SessionId, action: &str) -> Result<(), String> { + let db = self.db()?; + if let Some(tx) = db.try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) { + let _ = db.rollback_mut_tx(tx); + return Err(format!( + "expected write lock contention for connection {contender} during {action} while connection {owner} owns lock, but datastore accepted a second writer" + )); + } + Ok(()) + } + fn try_insert_base_row( &self, tx: &mut RelMutTx, @@ -1221,6 +1340,8 @@ impl RelationalDbEngine { fn durable_replay_summary(&self) -> Result { Ok(DurableReplaySummary { durable_offset: self.last_observed_durable_offset, + restored_snapshot_offset: self.last_restored_snapshot_offset, + latest_snapshot_offset: self.latest_snapshot_offset, base_rows: self.collect_base_rows()?, dynamic_table_count: self.dynamic_tables.len(), }) @@ -1234,9 +1355,11 @@ impl RelationalDbEngine { old_db.shutdown().await; drop(old_db); - let (durability, db) = self.reopen_from_history_with_fault_retry("final replay check")?; - self.durability = durability; - self.db = Some(db); + let reopened = self.reopen_from_history_with_fault_retry("final replay check")?; + self.durability = reopened.durability; + self.db = Some(reopened.db); + self.last_restored_snapshot_offset = reopened.restored_snapshot_offset; + self.latest_snapshot_offset = reopened.latest_snapshot_offset; self.rebuild_table_handles_after_reopen()?; self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); self.durable_replay_summary() @@ -1262,6 +1385,7 @@ impl RelationalDbEngine { transactions: self.stats.transaction_summary(durable_commit_count), runtime: self.stats.runtime_summary(), disk_faults: disk_fault_summary(self.commitlog_repo.fault_summary()), + snapshot_faults: disk_fault_summary(self.snapshot_repo.fault_summary()), replay, table, }) @@ -1380,22 +1504,28 @@ impl TargetEngine for RelationalDbEngine { } type StressCommitlogRepo = FaultableRepo; +type StressSnapshotRepo = BuggifiedSnapshotRepo; type InMemoryCommitlogDurability = DirectLocal; struct RelationalDbBootstrap { db: RelationalDB, commitlog_repo: StressCommitlogRepo, + snapshot_repo: StressSnapshotRepo, durability: Arc, durability_opts: spacetimedb_durability::local::Options, } fn bootstrap_relational_db( seed: DstSeed, - fault_profile: CommitlogFaultProfile, + commitlog_fault_config: CommitlogFaultConfig, + snapshot_fault_config: SnapshotFaultConfig, ) -> anyhow::Result { - let fault_config = CommitlogFaultConfig::for_profile(fault_profile); - - let commitlog_repo = FaultableRepo::new(MemoryCommitlogRepo::new(8 * 1024 * 1024), fault_config, seed.fork(702)); + let commitlog_repo = FaultableRepo::new( + MemoryCommitlogRepo::new(8 * 1024 * 1024), + commitlog_fault_config, + seed.fork(702), + ); + let snapshot_repo = BuggifiedSnapshotRepo::new(snapshot_fault_config, seed.fork(703))?; let durability_opts = commitlog_stress_options(seed.fork(701)); let durability = Arc::new( InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), durability_opts) @@ -1404,6 +1534,7 @@ fn bootstrap_relational_db( let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), + snapshot_repo: None, snapshots: None, runtime: spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), }; @@ -1422,6 +1553,7 @@ fn bootstrap_relational_db( Ok(RelationalDbBootstrap { db, commitlog_repo, + snapshot_repo, durability, durability_opts, }) @@ -1559,3 +1691,110 @@ fn dynamic_schema(name: &str, version: u32) -> TableSchema { None, ) } + +#[cfg(test)] +mod tests { + use crate::config::CommitlogFaultProfile; + + use super::*; + + fn run_seed_12_with_snapshot_fault( + configure: impl FnOnce(&mut SnapshotFaultConfig), + ) -> RelationalDbCommitlogOutcome { + let seed = DstSeed(12); + let config = RunConfig::with_max_interactions(100).with_commitlog_fault_profile(CommitlogFaultProfile::Off); + let mut snapshot_fault_config = SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Off); + snapshot_fault_config.enabled = true; + configure(&mut snapshot_fault_config); + let mut runtime = sim::Runtime::new(seed).unwrap(); + + runtime + .block_on(async move { + let (source, engine, properties) = build_with_fault_configs( + seed, + TableScenarioId::RandomCrud, + &config, + CommitlogFaultConfig::for_profile(CommitlogFaultProfile::Off), + snapshot_fault_config, + )?; + core::run_streaming(source, engine, properties, config).await + }) + .unwrap() + } + + #[test] + fn seed_12_exercises_snapshot_capture_and_restore() { + let seed = DstSeed(12); + let config = RunConfig::with_max_interactions(100).with_commitlog_fault_profile(CommitlogFaultProfile::Off); + let mut runtime = sim::Runtime::new(seed).unwrap(); + + let outcome = runtime + .block_on(run_generated_with_config_and_scenario( + seed, + TableScenarioId::RandomCrud, + config, + )) + .unwrap(); + + assert_eq!(outcome.interactions.snapshot_requested, 2); + assert_eq!(outcome.interactions.snapshot_created, 2); + assert_eq!(outcome.interactions.close_reopen_applied, 1); + assert!(outcome.replay.durable_offset.is_some()); + assert!(outcome.replay.restored_snapshot_offset.is_some()); + assert!(outcome.replay.restored_snapshot_offset <= outcome.replay.durable_offset); + } + + #[test] + fn targeted_snapshot_open_faults_are_skipped_and_replay_matches_model() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.open_error_prob = 1.0); + + assert_eq!(outcome.interactions.snapshot_requested, 2); + assert_eq!(outcome.interactions.snapshot_created, 0); + assert_eq!(outcome.interactions.snapshot_skipped, 2); + assert!(outcome.snapshot_faults.open_error > 0); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } + + #[test] + fn targeted_snapshot_metadata_faults_are_retryable_on_reopen() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.metadata_error_prob = 1.0); + + assert_eq!(outcome.interactions.close_reopen_applied, 1); + assert!(outcome.snapshot_faults.metadata_error > 0); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } + + #[test] + fn targeted_snapshot_read_faults_are_retryable_on_reopen() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.read_error_prob = 1.0); + + assert_eq!(outcome.interactions.snapshot_created, 2); + assert!(outcome.snapshot_faults.read_error > 0); + assert!(outcome.replay.restored_snapshot_offset.is_some()); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } + + #[test] + fn targeted_snapshot_write_faults_do_not_publish_new_snapshots() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.write_error_prob = 1.0); + + assert_eq!(outcome.interactions.snapshot_requested, 2); + assert_eq!(outcome.interactions.snapshot_created, 0); + assert_eq!(outcome.interactions.snapshot_skipped, 2); + assert!(outcome.snapshot_faults.write_error > 0); + assert!(outcome.replay.restored_snapshot_offset.is_none()); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } + + #[test] + fn targeted_snapshot_fsync_faults_do_not_publish_new_snapshots() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.fsync_error_prob = 1.0); + + assert_eq!(outcome.interactions.snapshot_requested, 2); + assert_eq!(outcome.interactions.snapshot_created, 0); + assert_eq!(outcome.interactions.snapshot_skipped, 2); + assert!(outcome.snapshot_faults.fsync_error > 0); + assert!(outcome.replay.restored_snapshot_offset.is_none()); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } +} diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index 8d5d6c584dd..46d84121631 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -17,6 +17,7 @@ use crate::{ #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(crate) struct CommitlogWorkloadProfile { pub(crate) close_reopen_pct: usize, + pub(crate) snapshot_pct: usize, pub(crate) create_dynamic_table_pct: usize, pub(crate) migrate_after_create_pct: usize, pub(crate) migrate_dynamic_table_pct: usize, @@ -27,6 +28,7 @@ impl Default for CommitlogWorkloadProfile { fn default() -> Self { Self { close_reopen_pct: 1, + snapshot_pct: 2, create_dynamic_table_pct: 1, migrate_after_create_pct: 55, migrate_dynamic_table_pct: 6, @@ -104,6 +106,10 @@ impl CommitlogWorkloadSource { self.pending.push_back(CommitlogInteraction::CloseReopen); } + if Percent::new(self.profile.snapshot_pct).sample(&mut self.rng) { + self.pending.push_back(CommitlogInteraction::TakeSnapshot); + } + if Percent::new(self.profile.create_dynamic_table_pct).sample(&mut self.rng) { let conn = ConnectionChoice { connection_count: self.num_connections, @@ -250,6 +256,7 @@ mod tests { let schema = scenario.generate_schema(&mut rng); let profile = CommitlogWorkloadProfile { close_reopen_pct: 100, + snapshot_pct: 100, create_dynamic_table_pct: 100, migrate_after_create_pct: 100, migrate_dynamic_table_pct: 100, diff --git a/crates/dst/src/workload/commitlog_ops/mod.rs b/crates/dst/src/workload/commitlog_ops/mod.rs index e08647e7a6f..62d0f99a82a 100644 --- a/crates/dst/src/workload/commitlog_ops/mod.rs +++ b/crates/dst/src/workload/commitlog_ops/mod.rs @@ -6,5 +6,6 @@ mod types; pub(crate) use generation::CommitlogWorkloadSource; pub use types::{ CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary, InteractionSummary, - RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary, + RuntimeSummary, SchemaSummary, SnapshotCaptureStatus, SnapshotObservation, TableOperationSummary, + TransactionSummary, }; diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index 78382fb6372..0c0591f102f 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -18,6 +18,8 @@ pub enum CommitlogInteraction { DropDynamicTable { conn: SessionId, slot: u32 }, /// Migrate dynamic table schema for a slot. MigrateDynamicTable { conn: SessionId, slot: u32 }, + /// Capture a durable snapshot of the current database state. + TakeSnapshot, /// Close and restart the database from durable history. CloseReopen, } @@ -34,6 +36,7 @@ pub struct CommitlogWorkloadOutcome { pub transactions: TransactionSummary, pub runtime: RuntimeSummary, pub disk_faults: DiskFaultSummary, + pub snapshot_faults: DiskFaultSummary, pub replay: DurableReplaySummary, pub table: TableWorkloadOutcome, } @@ -42,10 +45,30 @@ pub struct CommitlogWorkloadOutcome { #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct DurableReplaySummary { pub durable_offset: Option, + pub restored_snapshot_offset: Option, + pub latest_snapshot_offset: Option, pub base_rows: Vec>, pub dynamic_table_count: usize, } +/// Snapshot capture status observed by a target. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SnapshotCaptureStatus { + Captured { offset: u64 }, + SkippedOpenTransaction, + SkippedNoSnapshotCreated, + SkippedInjectedFault, +} + +/// Snapshot capture facts exposed to properties. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SnapshotObservation { + pub durable_offset: Option, + pub latest_before: Option, + pub latest_after: Option, + pub status: SnapshotCaptureStatus, +} + #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct SchemaSummary { pub initial_tables: usize, @@ -64,6 +87,9 @@ pub struct InteractionSummary { pub close_reopen_requested: usize, pub close_reopen_applied: usize, pub close_reopen_skipped: usize, + pub snapshot_requested: usize, + pub snapshot_created: usize, + pub snapshot_skipped: usize, pub skipped: usize, } diff --git a/crates/dst/src/workload/strategy.rs b/crates/dst/src/workload/strategy.rs index f3acbd2d19d..94108eced8c 100644 --- a/crates/dst/src/workload/strategy.rs +++ b/crates/dst/src/workload/strategy.rs @@ -3,43 +3,11 @@ //! This is intentionally minimal: we keep DST's streaming execution model and //! use strategies only for typed, composable input generation. -use std::marker::PhantomData; - use crate::seed::DstRng; /// Typed strategy that can sample values from the shared deterministic RNG. pub(crate) trait Strategy: Sized { fn sample(&self, rng: &mut DstRng) -> T; - - #[allow(dead_code)] - fn map(self, f: F) -> Map - where - F: Fn(T) -> U, - { - Map { - inner: self, - f, - _marker: PhantomData, - } - } -} - -/// `map` combinator for strategies. -#[allow(dead_code)] -pub(crate) struct Map { - inner: S, - f: F, - _marker: PhantomData T>, -} - -impl Strategy for Map -where - S: Strategy, - F: Fn(T) -> U, -{ - fn sample(&self, rng: &mut DstRng) -> U { - (self.f)(self.inner.sample(rng)) - } } /// Picks a value in `[0, upper)`. @@ -127,14 +95,6 @@ mod tests { assert_eq!(a, b); } - #[test] - fn map_combinator_works() { - let strategy = Percent::new(30).map(|picked| if picked { 1 } else { 0 }); - let mut rng = DstSeed(99).rng(); - let values = (0..8).map(|_| strategy.sample(&mut rng)).collect::>(); - assert!(values.iter().all(|v| *v == 0 || *v == 1)); - } - #[test] fn index_strategy_respects_bounds() { let mut rng = DstSeed(123).rng(); diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index 0cb699dbc24..e0b6ef3eecf 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -314,13 +314,13 @@ fn emit_write_conflict(planner: &mut ScenarioPlanner<'_>, owner: SessionId) -> b } let conn = candidates[planner.choose_index(candidates.len())]; if planner.roll_percent(50) { - planner.push_interaction(TableWorkloadInteraction::begin_tx_conflict(owner, conn)); + planner.push_interaction(TableWorkloadInteraction::begin_tx_conflict(conn)); return true; } let table = planner.choose_table(); let row = planner.make_row(table); - planner.push_interaction(TableWorkloadInteraction::write_conflict_insert(owner, conn, table, row)); + planner.push_interaction(TableWorkloadInteraction::write_conflict_insert(conn, table, row)); true } diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index 9302daec70a..96947a509bc 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -152,19 +152,12 @@ impl PlannedInteraction { ) } - pub fn begin_tx_conflict(_owner: SessionId, conn: SessionId) -> Self { + pub fn begin_tx_conflict(conn: SessionId) -> Self { Self::new(TableOperation::BeginTx { conn }, TableInteractionCase::BeginTxConflict) } - pub fn write_conflict_insert(_owner: SessionId, conn: SessionId, table: usize, row: SimRow) -> Self { - Self::new( - TableOperation::InsertRows { - conn, - table, - rows: vec![row], - }, - TableInteractionCase::WriteConflictInsert, - ) + pub fn write_conflict_insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_rows(conn, table, vec![row], TableInteractionCase::WriteConflictInsert) } pub fn insert(conn: SessionId, table: usize, row: SimRow) -> Self { @@ -172,14 +165,7 @@ impl PlannedInteraction { } pub fn insert_with_case(conn: SessionId, table: usize, row: SimRow, case: TableInteractionCase) -> Self { - Self::new( - TableOperation::InsertRows { - conn, - table, - rows: vec![row], - }, - case, - ) + Self::insert_rows(conn, table, vec![row], case) } pub fn delete(conn: SessionId, table: usize, row: SimRow) -> Self { @@ -187,14 +173,7 @@ impl PlannedInteraction { } pub fn delete_with_case(conn: SessionId, table: usize, row: SimRow, case: TableInteractionCase) -> Self { - Self::new( - TableOperation::DeleteRows { - conn, - table, - rows: vec![row], - }, - case, - ) + Self::delete_rows(conn, table, vec![row], case) } pub fn exact_duplicate_insert(conn: SessionId, table: usize, row: SimRow) -> Self { @@ -210,17 +189,19 @@ impl PlannedInteraction { } pub fn batch_insert(conn: SessionId, table: usize, rows: Vec) -> Self { - Self::new( - TableOperation::InsertRows { conn, table, rows }, - TableInteractionCase::BatchInsert, - ) + Self::insert_rows(conn, table, rows, TableInteractionCase::BatchInsert) } pub fn batch_delete(conn: SessionId, table: usize, rows: Vec) -> Self { - Self::new( - TableOperation::DeleteRows { conn, table, rows }, - TableInteractionCase::BatchDelete, - ) + Self::delete_rows(conn, table, rows, TableInteractionCase::BatchDelete) + } + + fn insert_rows(conn: SessionId, table: usize, rows: Vec, case: TableInteractionCase) -> Self { + Self::new(TableOperation::InsertRows { conn, table, rows }, case) + } + + fn delete_rows(conn: SessionId, table: usize, rows: Vec, case: TableInteractionCase) -> Self { + Self::new(TableOperation::DeleteRows { conn, table, rows }, case) } pub fn add_column(conn: SessionId, table: usize, column: ColumnPlan, default: AlgebraicValue) -> Self { diff --git a/crates/snapshot/tests/remote.rs b/crates/snapshot/tests/remote.rs index 41097b33abd..9cd4b5e56e5 100644 --- a/crates/snapshot/tests/remote.rs +++ b/crates/snapshot/tests/remote.rs @@ -234,6 +234,7 @@ async fn create_snapshot(repo: Arc) -> anyhow::Result::default())), + snapshot_repo: None, snapshots: Some(SnapshotWorker::new(repo, snapshot::Compression::Disabled)), runtime: rt, }; From d2b5eedb9d57c3909391fe8d7f1d528155ac1d1a Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 6 May 2026 17:38:41 +0530 Subject: [PATCH 31/74] make dst snapshot in-memory --- Cargo.toml | 2 +- crates/core/build.rs | 10 - crates/core/src/db/persistence.rs | 22 +- crates/core/src/db/relational_db.rs | 67 ++-- crates/core/src/db/snapshot.rs | 7 +- .../subscription/module_subscription_actor.rs | 2 +- .../src/locking_tx_datastore/datastore.rs | 32 +- crates/dst/src/sim/snapshot.rs | 57 ++- .../src/targets/relational_db_commitlog.rs | 4 +- crates/snapshot/src/lib.rs | 334 +++++++++++++++++- crates/snapshot/tests/remote.rs | 2 +- crates/standalone/build.rs | 10 - run_dst.sh | 10 - 13 files changed, 445 insertions(+), 114 deletions(-) delete mode 100644 crates/core/build.rs delete mode 100644 crates/standalone/build.rs delete mode 100755 run_dst.sh diff --git a/Cargo.toml b/Cargo.toml index d1b1efdaa4e..5b1027ad73c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -392,7 +392,7 @@ features = [ ] [workspace.lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(madsim)', 'cfg(simulation)'] } +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(simulation)'] } [workspace.lints.clippy] # FIXME: we should work on this lint incrementally diff --git a/crates/core/build.rs b/crates/core/build.rs deleted file mode 100644 index 3982c077afc..00000000000 --- a/crates/core/build.rs +++ /dev/null @@ -1,10 +0,0 @@ -fn main() { - println!("cargo:rerun-if-env-changed=CARGO_CFG_MADSIM"); - println!("cargo:rerun-if-env-changed=CARGO_CFG_SIMULATION"); - println!("cargo:rerun-if-env-changed=CARGO_ENCODED_RUSTFLAGS"); - println!("cargo:rerun-if-env-changed=RUSTFLAGS"); - - if std::env::var_os("CARGO_CFG_MADSIM").is_some() { - println!("cargo:rustc-cfg=simulation"); - } -} diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index a2f6711d98f..8ee55a00b72 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use spacetimedb_commitlog::SizeOnDisk; use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; -use spacetimedb_snapshot::SnapshotRepository; +use spacetimedb_snapshot::SnapshotStore; use crate::{messages::control_db::Database, runtime::RuntimeDispatch, util::asyncify}; @@ -35,12 +35,12 @@ pub struct Persistence { /// Currently the expectation is that the reported size is the commitlog /// size only. pub disk_size: DiskSizeFn, - /// An optional [SnapshotRepository] used when restoring from snapshots. + /// Optional snapshot storage used when restoring from snapshots. /// /// This is separate from [SnapshotWorker] so deterministic simulation /// targets can use synchronous snapshot creation without starting the /// Tokio-backed worker. - pub snapshot_repo: Option>, + pub snapshot_store: Option>, /// An optional [SnapshotWorker]. /// /// The current expectation is that snapshots are only enabled for @@ -71,17 +71,17 @@ impl Persistence { Self { durability: Arc::new(durability), disk_size: Arc::new(disk_size), - snapshot_repo: None, + snapshot_store: None, snapshots, runtime, } } - /// If snapshots are enabled, get the [SnapshotRepository] they are stored in. - pub fn snapshot_repo(&self) -> Option<&SnapshotRepository> { - self.snapshot_repo - .as_deref() - .or_else(|| self.snapshots.as_ref().map(|worker| worker.repo())) + /// If snapshots are enabled, get the snapshot storage they are stored in. + pub fn snapshot_store(&self) -> Option> { + self.snapshot_store + .clone() + .or_else(|| self.snapshots.as_ref().map(|worker| worker.snapshot_store())) } /// Get the [TxOffset] reported as durable by the [Durability] impl. @@ -115,7 +115,7 @@ impl Persistence { |Self { durability, disk_size, - snapshot_repo: _, + snapshot_store: _, snapshots, runtime, }| (Some(durability), Some(disk_size), snapshots, Some(runtime)), @@ -180,7 +180,7 @@ impl PersistenceProvider for LocalPersistenceProvider { Ok(Persistence { durability, disk_size, - snapshot_repo: None, + snapshot_store: None, snapshots: Some(snapshot_worker), runtime: RuntimeDispatch::tokio_current(), }) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 43bc70a63c6..ebf92bab992 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -52,7 +52,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotError, SnapshotRepository}; +use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotError, SnapshotRepository, SnapshotStore}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -237,11 +237,11 @@ impl RelationalDB { /// /// `None` may be passed to obtain an in-memory only database. /// - /// - `snapshot_repo` + /// - snapshot storage /// - /// The [`SnapshotRepository`] which stores snapshots of this database. + /// The [`SnapshotStore`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. - /// If restoring from an existing database, the `snapshot_repo` must + /// If restoring from an existing database, the snapshot store must /// store views of the same sequence of TXes as the `history`. /// /// - `metrics_recorder_queue` @@ -282,9 +282,10 @@ impl RelationalDB { let start_time = std::time::Instant::now(); + let snapshot_store = persistence.as_ref().and_then(|p| p.snapshot_store()); let inner = Self::restore_from_snapshot_or_bootstrap( database_identity, - persistence.as_ref().and_then(|p| p.snapshot_repo()), + snapshot_store.as_deref(), durable_tx_offset, min_commitlog_offset, page_pool, @@ -293,10 +294,10 @@ impl RelationalDB { // Sanity check because the snapshot worker could've been used before. debug_assert!( persistence - .snapshot_repo() - .map(|repo| repo.database_identity() == database_identity) + .snapshot_store() + .map(|store| store.database_identity() == database_identity) .unwrap_or(true), - "snapshot repository does not match database identity", + "snapshot store does not match database identity", ); persistence.set_snapshot_state(inner.committed_state.clone()); } @@ -475,7 +476,7 @@ impl RelationalDB { fn restore_from_snapshot_or_bootstrap( database_identity: Identity, - snapshot_repo: Option<&SnapshotRepository>, + snapshot_store: Option<&dyn SnapshotStore>, durable_tx_offset: Option, min_commitlog_offset: TxOffset, page_pool: PagePool, @@ -483,14 +484,14 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &SnapshotRepository, + snapshot_store: &dyn SnapshotStore, snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { log::info!("[{database_identity}] DATABASE: restoring snapshot of tx_offset {snapshot_offset}"); let start = std::time::Instant::now(); - let snapshot = snapshot_repo + let snapshot = snapshot_store .read_snapshot(snapshot_offset, page_pool) .map_err(Box::new)?; @@ -556,11 +557,11 @@ impl RelationalDB { } } - if let Some((snapshot_repo, durable_tx_offset)) = snapshot_repo.zip(durable_tx_offset) { + if let Some((snapshot_store, durable_tx_offset)) = snapshot_store.zip(durable_tx_offset) { // Mark any newer snapshots as invalid, as the history past // `durable_tx_offset` may have been reset and thus diverge from // any snapshots taken earlier. - snapshot_repo + snapshot_store .invalidate_newer_snapshots(durable_tx_offset) .map_err(|e| RestoreSnapshotError::Invalidate { offset: durable_tx_offset, @@ -571,7 +572,7 @@ impl RelationalDB { // range `(min_commitlog_offset + 1)..=durable_tx_offset`. let mut upper_bound = durable_tx_offset; loop { - let Some(snapshot_offset) = snapshot_repo + let Some(snapshot_offset) = snapshot_store .latest_snapshot_older_than(upper_bound) .map_err(Box::new)? else { @@ -581,7 +582,7 @@ impl RelationalDB { log::debug!("snapshot_offset={snapshot_offset} min_commitlog_offset={min_commitlog_offset}"); break; } - match try_load_snapshot(&database_identity, snapshot_repo, snapshot_offset, &page_pool) { + match try_load_snapshot(&database_identity, snapshot_store, snapshot_offset, &page_pool) { Ok(snapshot) if snapshot.database_identity != database_identity => { return Err(RestoreSnapshotError::IdentityMismatch { expected: database_identity, @@ -596,11 +597,12 @@ impl RelationalDB { // Invalidate the snapshot if the error is permanent. // Newly created snapshots should not depend on it. if !is_transient_error(&e) { - let path = snapshot_repo.snapshot_dir_path(snapshot_offset); - log::info!("invalidating bad snapshot at {}", path.display()); - path.rename_invalid().map_err(|e| RestoreSnapshotError::Invalidate { - offset: snapshot_offset, - source: Box::new(e.into()), + log::info!("invalidating bad snapshot at {snapshot_offset}"); + snapshot_store.invalidate_snapshot(snapshot_offset).map_err(|e| { + RestoreSnapshotError::Invalidate { + offset: snapshot_offset, + source: Box::new(e), + } })?; } // Try the next older one if the error was transient. @@ -616,7 +618,7 @@ impl RelationalDB { } } } - log::info!("[{database_identity}] DATABASE: no usable snapshot on disk"); + log::info!("[{database_identity}] DATABASE: no usable snapshot in store"); // If we didn't find a snapshot and the commitlog doesn't start at the // zero-th commit (e.g. due to archiving), there is no way to restore @@ -903,6 +905,14 @@ impl RelationalDB { Ok(self.inner.take_snapshot(repo)?) } + /// Capture a snapshot into a repository abstraction. + /// + /// This is used by simulator-backed tests which need controlled storage + /// instead of a filesystem path. + pub fn take_snapshot_store(&self, store: &dyn SnapshotStore) -> Result, DBError> { + Ok(self.inner.take_snapshot_store(store)?) + } + /// Run a fallible function in a transaction. /// /// If the supplied function returns `Ok`, the transaction is automatically @@ -1959,7 +1969,7 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, - snapshot_repo: None, + snapshot_store: None, snapshots, runtime: RuntimeDispatch::tokio(rt), }; @@ -2081,7 +2091,7 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, - snapshot_repo: None, + snapshot_store: None, snapshots, runtime: RuntimeDispatch::tokio(rt), }; @@ -3687,7 +3697,7 @@ mod tests { let repo = open_snapshot_repo(dir, Identity::ZERO, 0)?; RelationalDB::restore_from_snapshot_or_bootstrap( Identity::ZERO, - Some(&repo), + Some(repo.as_ref()), Some(last_compress), 0, PagePool::new_for_test(), @@ -3715,8 +3725,13 @@ mod tests { ); let last = repo.latest_snapshot()?; - let stdb = - RelationalDB::restore_from_snapshot_or_bootstrap(identity, Some(&repo), last, 0, PagePool::new_for_test())?; + let stdb = RelationalDB::restore_from_snapshot_or_bootstrap( + identity, + Some(repo.as_ref()), + last, + 0, + PagePool::new_for_test(), + )?; let out = TempDir::with_prefix("snapshot_test")?; let dir = SnapshotsPath::from_path_unchecked(out.path()); diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index c47e1d33d2d..bae4a7bf980 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,7 +14,7 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, SnapshotRepository}; +use spacetimedb_snapshot::{CompressionStats, SnapshotRepository, SnapshotStore}; use tokio::sync::watch; use crate::{util::asyncify, worker_metrics::WORKER_METRICS}; @@ -110,6 +110,11 @@ impl SnapshotWorker { &self.snapshot_repository } + /// Get this worker's snapshot storage as a generic snapshot store. + pub fn snapshot_store(&self) -> Arc { + self.snapshot_repository.clone() + } + /// Request a snapshot to be taken. /// /// The snapshot will be taken at some point in the future. diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 7be46b25f13..80757b46242 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2045,7 +2045,7 @@ mod tests { Some(Persistence { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), - snapshot_repo: None, + snapshot_store: None, snapshots: None, runtime: crate::runtime::RuntimeDispatch::tokio(rt), }), diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index 0c5e7655d43..13057e18a37 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -39,7 +39,7 @@ use spacetimedb_schema::{ reducer_name::ReducerName, schema::{ColumnSchema, IndexSchema, SequenceSchema, TableSchema}, }; -use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotRepository, UnflushedSnapshot}; +use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotRepository, SnapshotStore, UnflushedSnapshot}; use spacetimedb_table::{ indexes::RowPointer, page_pool::PagePool, @@ -234,6 +234,14 @@ impl Locking { .map_err(Into::into) } + /// Take a snapshot through a repository abstraction. + /// + /// Unlike [`Self::take_snapshot`], this does not expose filesystem paths and + /// can therefore be backed by in-memory simulator storage. + pub fn take_snapshot_store(&self, store: &dyn SnapshotStore) -> Result> { + Self::take_snapshot_store_internal(&self.committed_state, store) + } + pub fn assert_system_tables_match(&self) -> Result<()> { let committed_state = self.committed_state.read_arc(); committed_state.assert_system_table_schemas_match() @@ -260,6 +268,28 @@ impl Locking { Ok(Some((tx_offset, unflushed_snapshot))) } + pub fn take_snapshot_store_internal( + committed_state: &RwLock, + store: &dyn SnapshotStore, + ) -> Result> { + let mut committed_state = committed_state.write(); + let Some(tx_offset) = committed_state.next_tx_offset.checked_sub(1) else { + return Ok(None); + }; + + log::info!( + "Capturing snapshot of database {:?} at TX offset {}", + store.database_identity(), + tx_offset, + ); + + let (mut tables, blob_store) = committed_state.persistent_tables_and_blob_store(); + store + .capture_snapshot(&mut tables, blob_store, tx_offset) + .map(Some) + .map_err(Into::into) + } + /// Returns a list over all the currently connected clients, /// reading from the `st_clients` system table. pub fn connected_clients<'a>( diff --git a/crates/dst/src/sim/snapshot.rs b/crates/dst/src/sim/snapshot.rs index a09d66ac96a..0d851c62259 100644 --- a/crates/dst/src/sim/snapshot.rs +++ b/crates/dst/src/sim/snapshot.rs @@ -1,17 +1,15 @@ -//! Production snapshot storage with deterministic fault injection. +//! In-memory snapshot storage with deterministic fault injection. //! -//! This is intentionally a semantic snapshot seam, not a filesystem facade. -//! Targets can use it to model snapshot lifecycle behavior while still writing -//! and reading real `SnapshotRepository` data. +//! This is intentionally a semantic snapshot seam, not a filesystem facade. It +//! keeps DST snapshot bytes inside controlled memory storage, while still using +//! the same snapshot capture/restore shape as production. use std::sync::Arc; -use spacetimedb_core::db::relational_db::{open_snapshot_repo, RelationalDB}; +use spacetimedb_core::db::relational_db::RelationalDB; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_paths::{server::SnapshotsPath, FromPathUnchecked}; -use spacetimedb_snapshot::SnapshotRepository; -use tempfile::TempDir; +use spacetimedb_snapshot::{MemorySnapshotRepository, SnapshotStore}; use crate::{ seed::DstSeed, @@ -29,39 +27,30 @@ pub(crate) fn is_injected_snapshot_error_text(text: &str) -> bool { } pub(crate) struct SnapshotRestoreRepo { - pub(crate) repo: Option>, + pub(crate) store: Option>, pub(crate) restored_snapshot_offset: Option, pub(crate) latest_snapshot_offset: Option, } -/// Real snapshot repository wrapped with deterministic operation-level faults. +/// In-memory snapshot repository wrapped with deterministic operation-level faults. /// /// The bytes/pages are written and read by `spacetimedb-snapshot`; this wrapper /// only decides whether a DST operation reaches that repository. That keeps -/// restore semantics aligned with production without requiring the Tokio-backed -/// `SnapshotWorker` inside the simulator. +/// restore semantics aligned with production without requiring the +/// Tokio-backed `SnapshotWorker` or the host filesystem inside the simulator. /// /// This is the intended boundary for the current DST target. It exercises /// capture/restore behavior, retry classification, and replay correctness. It -/// does not model torn snapshot pages or byte-level corruption; those require a -/// deeper repository abstraction inside `spacetimedb-snapshot`. +/// does not model torn snapshot pages or byte-level corruption. pub(crate) struct BuggifiedSnapshotRepo { - _root: TempDir, - repo: Arc, + repo: Arc, faults: StorageFaultController, } impl BuggifiedSnapshotRepo { pub(crate) fn new(config: SnapshotFaultConfig, seed: DstSeed) -> anyhow::Result { - let root = tempfile::Builder::new() - .prefix("spacetimedb-dst-snapshots-") - .tempdir()?; - let path = SnapshotsPath::from_path_unchecked(root.path()); - let repo = open_snapshot_repo(path, Identity::ZERO, 0) - .map_err(|err| anyhow::anyhow!("open DST snapshot repo failed: {err}"))?; Ok(Self { - _root: root, - repo, + repo: Arc::new(MemorySnapshotRepository::new(Identity::ZERO, 0)), faults: StorageFaultController::new(config, StorageFaultDomain::Snapshot, seed), }) } @@ -94,15 +83,9 @@ impl BuggifiedSnapshotRepo { self.inject(StorageFaultKind::Fsync)?; let created = db - .take_snapshot(&self.repo) + .take_snapshot_store(self.repo.as_ref()) .map_err(|err| format!("snapshot capture failed: {err}"))?; - if created.is_none() { - return Ok(None); - } - - self.repo - .latest_snapshot() - .map_err(|err| format!("snapshot metadata after capture failed: {err}")) + Ok(created) } pub(crate) fn repo_for_restore(&self, durable_offset: Option) -> Result { @@ -111,7 +94,7 @@ impl BuggifiedSnapshotRepo { self.inject(StorageFaultKind::Metadata)?; let Some(durable_offset) = durable_offset else { return Ok(SnapshotRestoreRepo { - repo: None, + store: None, restored_snapshot_offset: None, latest_snapshot_offset, }); @@ -122,7 +105,7 @@ impl BuggifiedSnapshotRepo { .map_err(|err| format!("snapshot metadata before restore failed: {err}"))?; if restored_snapshot_offset.is_none() { return Ok(SnapshotRestoreRepo { - repo: None, + store: None, restored_snapshot_offset, latest_snapshot_offset, }); @@ -131,7 +114,7 @@ impl BuggifiedSnapshotRepo { self.inject(StorageFaultKind::Open)?; self.inject(StorageFaultKind::Read)?; Ok(SnapshotRestoreRepo { - repo: Some(self.repo.clone()), + store: Some(self.repo.clone()), restored_snapshot_offset, latest_snapshot_offset, }) @@ -164,7 +147,7 @@ mod tests { fn repo_without_snapshots_is_not_used_for_restore() { let repo = BuggifiedSnapshotRepo::new(no_faults(), DstSeed(41)).unwrap(); - assert!(repo.repo_for_restore(Some(0)).unwrap().repo.is_none()); + assert!(repo.repo_for_restore(Some(0)).unwrap().store.is_none()); } #[test] @@ -188,7 +171,7 @@ mod tests { let restore = repo.with_faults_suspended(|| repo.repo_for_restore(Some(0))); - assert!(restore.unwrap().repo.is_none()); + assert!(restore.unwrap().store.is_none()); assert_eq!(repo.fault_summary().metadata_error, 0); } } diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 94d7a71f722..750f955e45d 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -484,7 +484,7 @@ impl RelationalDbEngine { let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), - snapshot_repo: snapshot_restore.repo, + snapshot_store: snapshot_restore.store, snapshots: None, runtime: spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), }; @@ -1534,7 +1534,7 @@ fn bootstrap_relational_db( let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), - snapshot_repo: None, + snapshot_store: None, snapshots: None, runtime: spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), }; diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 66c25ed824a..ade599853ff 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -48,6 +48,7 @@ use std::fs::{self, File}; use std::io; use std::ops::RangeBounds; use std::path::Path; +use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::time::{Duration, Instant}; use std::{ collections::BTreeMap, @@ -1139,13 +1140,19 @@ impl SnapshotRepository { .collect::>(); for newer_snapshot in newer_snapshots { - let path = self.snapshot_dir_path(newer_snapshot); - log::info!("Renaming snapshot newer than {upper_bound} from {path:?} to {path:?}"); - path.rename_invalid()?; + self.invalidate_snapshot(newer_snapshot)?; } Ok(()) } + /// Mark a single snapshot invalid so it will not be considered for future + /// restores. + pub fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + let path = self.snapshot_dir_path(tx_offset); + log::info!("Renaming snapshot {tx_offset} from {path:?} to invalid"); + path.rename_invalid().map_err(Into::into) + } + /// Compress the `current` snapshot, unless it is already compressed. /// /// If a `parent` snapshot is given, its object repo will be used to @@ -1329,6 +1336,327 @@ impl SnapshotRepository { } } +/// Snapshot storage backend. +/// +/// Production uses the filesystem-backed [`SnapshotRepository`]. DST can use +/// [`MemorySnapshotRepository`] to keep snapshot storage inside the simulator +/// boundary instead of depending on temporary directories or host filesystem +/// behavior. +pub trait SnapshotStore: Send + Sync { + fn database_identity(&self) -> Identity; + + fn capture_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result; + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result; + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError>; + + fn latest_snapshot(&self) -> Result, SnapshotError> { + self.latest_snapshot_older_than(TxOffset::MAX) + } + + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError>; + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError>; +} + +impl SnapshotStore for SnapshotRepository { + fn database_identity(&self) -> Identity { + SnapshotRepository::database_identity(self) + } + + fn capture_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.create_snapshot(tables, blobs, tx_offset)?.sync_all()?; + Ok(tx_offset) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + SnapshotRepository::read_snapshot(self, tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + SnapshotRepository::latest_snapshot_older_than(self, upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + SnapshotRepository::latest_snapshot(self) + } + + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_newer_snapshots(self, upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_snapshot(self, tx_offset) + } +} + +/// In-memory snapshot repository for deterministic tests. +/// +/// This stores snapshot object bytes in process memory and reconstructs through +/// the same [`ReconstructedSnapshot`] shape as the filesystem repository. It is +/// not durable and intentionally does not model the on-disk two-phase flush +/// protocol; it is a simulator/test backend for semantic snapshot capture and +/// restore. +pub struct MemorySnapshotRepository { + database_identity: Identity, + replica_id: u64, + snapshots: RwLock>, +} + +impl MemorySnapshotRepository { + pub fn new(database_identity: Identity, replica_id: u64) -> Self { + Self { + database_identity, + replica_id, + snapshots: RwLock::new(BTreeMap::new()), + } + } + + pub fn database_identity(&self) -> Identity { + self.database_identity + } + + pub fn capture_snapshot<'db>( + &self, + tables: impl Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.invalidate_newer_snapshots(tx_offset.saturating_sub(1))?; + let snapshot = MemorySnapshot::capture(self.database_identity, self.replica_id, tables, blobs, tx_offset)?; + self.write_snapshots()?.insert(tx_offset, snapshot); + Ok(tx_offset) + } + + pub fn read_snapshot( + &self, + tx_offset: TxOffset, + page_pool: &PagePool, + ) -> Result { + let snapshot = self + .read_snapshots()? + .get(&tx_offset) + .cloned() + .ok_or_else(|| memory_snapshot_not_found(tx_offset))?; + snapshot.reconstruct(page_pool) + } + + pub fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + Ok(self + .read_snapshots()? + .range(..=upper_bound) + .next_back() + .map(|(&tx_offset, _)| tx_offset)) + } + + pub fn latest_snapshot(&self) -> Result, SnapshotError> { + self.latest_snapshot_older_than(TxOffset::MAX) + } + + pub fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + self.write_snapshots()?.retain(|tx_offset, _| *tx_offset <= upper_bound); + Ok(()) + } + + pub fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + self.write_snapshots()?.remove(&tx_offset); + Ok(()) + } + + fn read_snapshots(&self) -> Result>, SnapshotError> { + self.snapshots.read().map_err(|_| memory_snapshot_lock_poisoned()) + } + + fn write_snapshots(&self) -> Result>, SnapshotError> { + self.snapshots.write().map_err(|_| memory_snapshot_lock_poisoned()) + } +} + +impl SnapshotStore for MemorySnapshotRepository { + fn database_identity(&self) -> Identity { + MemorySnapshotRepository::database_identity(self) + } + + fn capture_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + MemorySnapshotRepository::capture_snapshot(self, tables, blobs, tx_offset) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + MemorySnapshotRepository::read_snapshot(self, tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + MemorySnapshotRepository::latest_snapshot_older_than(self, upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + MemorySnapshotRepository::latest_snapshot(self) + } + + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + MemorySnapshotRepository::invalidate_newer_snapshots(self, upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + MemorySnapshotRepository::invalidate_snapshot(self, tx_offset) + } +} + +#[derive(Clone)] +struct MemorySnapshot { + database_identity: Identity, + replica_id: u64, + tx_offset: TxOffset, + module_abi_version: [u16; 2], + blobs: Vec, + tables: BTreeMap>, +} + +impl MemorySnapshot { + fn capture<'db>( + database_identity: Identity, + replica_id: u64, + tables: impl Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + let blobs = blobs + .iter_blobs() + .map(|(hash, uses, bytes)| MemoryBlob { + hash: *hash, + uses: uses as u32, + bytes: bytes.into(), + }) + .collect(); + + let tables = tables + .map(|table| { + let pages = table + .iter_pages_with_hashes() + .map(|(hash, page)| { + let bytes = bsatn::to_vec(page).map_err(|cause| SnapshotError::Serialize { + ty: ObjectType::Page(hash), + cause, + })?; + Ok(MemoryPage { hash, bytes }) + }) + .collect::, SnapshotError>>()?; + Ok((table.schema.table_id, pages)) + }) + .collect::, SnapshotError>>()?; + + Ok(Self { + database_identity, + replica_id, + tx_offset, + module_abi_version: CURRENT_MODULE_ABI_VERSION, + blobs, + tables, + }) + } + + fn reconstruct(self, page_pool: &PagePool) -> Result { + let source_repo = memory_snapshot_path(self.tx_offset); + let mut blob_store = HashMapBlobStore::default(); + for MemoryBlob { hash, uses, bytes } in self.blobs { + let computed = BlobHash::hash_from_bytes(&bytes); + if hash != computed { + return Err(SnapshotError::HashMismatch { + ty: ObjectType::Blob(hash), + expected: hash.data, + computed: computed.data, + source_repo: source_repo.clone(), + }); + } + blob_store.insert_with_uses(&hash, uses as usize, bytes); + } + + let tables = + self.tables + .into_iter() + .map(|(table_id, pages)| { + let pages = pages + .into_iter() + .map(|MemoryPage { hash, bytes }| { + let page = page_pool.take_deserialize_from(&bytes).map_err(|cause| { + SnapshotError::Deserialize { + ty: ObjectType::Page(hash), + source_repo: source_repo.clone(), + cause, + } + })?; + let computed = page.content_hash(); + if hash != computed { + return Err(SnapshotError::HashMismatch { + ty: ObjectType::Page(hash), + expected: *hash.as_bytes(), + computed: *computed.as_bytes(), + source_repo: source_repo.clone(), + }); + } + Ok(page) + }) + .collect::, SnapshotError>>()?; + Ok((table_id, pages)) + }) + .collect::, SnapshotError>>()?; + + Ok(ReconstructedSnapshot { + database_identity: self.database_identity, + replica_id: self.replica_id, + tx_offset: self.tx_offset, + module_abi_version: self.module_abi_version, + blob_store, + tables, + compress_type: CompressType::None, + }) + } +} + +#[derive(Clone)] +struct MemoryBlob { + hash: BlobHash, + uses: u32, + bytes: Box<[u8]>, +} + +#[derive(Clone)] +struct MemoryPage { + hash: blake3::Hash, + bytes: Vec, +} + +fn memory_snapshot_lock_poisoned() -> SnapshotError { + SnapshotError::Io(io::Error::other("memory snapshot repository lock poisoned")) +} + +fn memory_snapshot_not_found(tx_offset: TxOffset) -> SnapshotError { + SnapshotError::Io(io::Error::new( + io::ErrorKind::NotFound, + format!("memory snapshot {tx_offset} not found"), + )) +} + +fn memory_snapshot_path(tx_offset: TxOffset) -> PathBuf { + PathBuf::from(format!("")) +} + pub struct ReconstructedSnapshot { /// The identity of the snapshotted database. pub database_identity: Identity, diff --git a/crates/snapshot/tests/remote.rs b/crates/snapshot/tests/remote.rs index 9cd4b5e56e5..3d63b23f1fe 100644 --- a/crates/snapshot/tests/remote.rs +++ b/crates/snapshot/tests/remote.rs @@ -234,7 +234,7 @@ async fn create_snapshot(repo: Arc) -> anyhow::Result::default())), - snapshot_repo: None, + snapshot_store: None, snapshots: Some(SnapshotWorker::new(repo, snapshot::Compression::Disabled)), runtime: rt, }; diff --git a/crates/standalone/build.rs b/crates/standalone/build.rs deleted file mode 100644 index 3982c077afc..00000000000 --- a/crates/standalone/build.rs +++ /dev/null @@ -1,10 +0,0 @@ -fn main() { - println!("cargo:rerun-if-env-changed=CARGO_CFG_MADSIM"); - println!("cargo:rerun-if-env-changed=CARGO_CFG_SIMULATION"); - println!("cargo:rerun-if-env-changed=CARGO_ENCODED_RUSTFLAGS"); - println!("cargo:rerun-if-env-changed=RUSTFLAGS"); - - if std::env::var_os("CARGO_CFG_MADSIM").is_some() { - println!("cargo:rustc-cfg=simulation"); - } -} diff --git a/run_dst.sh b/run_dst.sh deleted file mode 100755 index 1a54fec27d4..00000000000 --- a/run_dst.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -set -euo pipefail - -cd "$(dirname "$0")" - -# madsim-tokio and madsim still use cfg(madsim). SpacetimeDB crates derive -# cfg(simulation) from it in build.rs so source gates can stay simulator-provider -# neutral. Passing only --cfg simulation leaves madsim in std/Tokio mode. -export RUSTFLAGS="${RUSTFLAGS:+$RUSTFLAGS }--cfg madsim" -exec cargo run -p spacetimedb-dst -- "$@" From 27cb858b1ae7b0d47b0d3959c7989d5acfb798c7 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 7 May 2026 15:26:24 +0530 Subject: [PATCH 32/74] snapshot abstraction at worker --- Cargo.lock | 123 +----------------- crates/core/src/db/persistence.rs | 27 ++-- crates/core/src/db/relational_db.rs | 62 ++++----- crates/core/src/db/snapshot.rs | 106 +++++++++------ .../subscription/module_subscription_actor.rs | 1 - crates/dst/src/properties/runtime.rs | 3 - crates/dst/src/sim/snapshot.rs | 91 ++++++++++--- .../src/targets/relational_db_commitlog.rs | 76 +++-------- .../src/workload/commitlog_ops/generation.rs | 4 - .../dst/src/workload/commitlog_ops/types.rs | 2 - crates/runtime/src/lib.rs | 22 ++++ crates/snapshot/src/lib.rs | 8 +- crates/standalone/Cargo.toml | 2 +- 13 files changed, 233 insertions(+), 294 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 195f5db837b..4908354d0b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1623,38 +1623,14 @@ dependencies = [ "synstructure 0.12.6", ] -[[package]] -name = "darling" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" -dependencies = [ - "darling_core 0.14.4", - "darling_macro 0.14.4", -] - [[package]] name = "darling" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", -] - -[[package]] -name = "darling_core" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 1.0.109", + "darling_core", + "darling_macro", ] [[package]] @@ -1671,24 +1647,13 @@ dependencies = [ "syn 2.0.107", ] -[[package]] -name = "darling_macro" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" -dependencies = [ - "darling_core 0.14.4", - "quote", - "syn 1.0.109", -] - [[package]] name = "darling_macro" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ - "darling_core 0.21.3", + "darling_core", "quote", "syn 2.0.107", ] @@ -1950,12 +1915,6 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" -[[package]] -name = "downcast-rs" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" - [[package]] name = "dragonbox_ecma" version = "0.1.0" @@ -2106,7 +2065,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f43e744e4ea338060faee68ed933e46e722fb7f3617e722a5772d7e856d8b3ce" dependencies = [ - "darling 0.21.3", + "darling", "proc-macro2", "quote", "syn 2.0.107", @@ -4023,55 +3982,6 @@ dependencies = [ "libc", ] -[[package]] -name = "madsim" -version = "0.2.34" -dependencies = [ - "ahash 0.8.12", - "async-channel", - "async-stream", - "async-task", - "bincode", - "bytes", - "downcast-rs", - "errno", - "futures-util", - "lazy_static", - "libc", - "madsim-macros", - "naive-timer", - "panic-message", - "rand 0.8.5", - "rand_xoshiro", - "rustversion", - "serde", - "spin", - "tokio", - "tokio-util", - "toml 0.9.8", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "madsim-macros" -version = "0.2.12" -dependencies = [ - "darling 0.14.4", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "madsim-tokio" -version = "0.2.30" -dependencies = [ - "madsim", - "spin", - "tokio", -] - [[package]] name = "mappings" version = "0.7.1" @@ -4247,12 +4157,6 @@ dependencies = [ "syn 2.0.107", ] -[[package]] -name = "naive-timer" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "034a0ad7deebf0c2abcf2435950a6666c3c15ea9d8fad0c0f48efa8a7f843fed" - [[package]] name = "names" version = "0.14.0" @@ -5276,12 +5180,6 @@ dependencies = [ "rustc-hash", ] -[[package]] -name = "panic-message" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384e52fd8fbd4cbe3c317e8216260c21a0f9134de108cea8a4dd4e7e152c472d" - [[package]] name = "papaya" version = "0.2.3" @@ -6242,15 +6140,6 @@ dependencies = [ "rand_core 0.9.3", ] -[[package]] -name = "rand_xoshiro" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" -dependencies = [ - "rand_core 0.6.4", -] - [[package]] name = "rayon" version = "1.11.0" @@ -7481,7 +7370,7 @@ version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7e6c180db0816026a61afa1cff5344fb7ebded7e4d3062772179f2501481c27" dependencies = [ - "darling 0.21.3", + "darling", "proc-macro2", "quote", "syn 2.0.107", @@ -8826,7 +8715,6 @@ dependencies = [ "hostname", "http 1.3.1", "log", - "madsim-tokio", "netstat2", "once_cell", "openssl", @@ -8850,6 +8738,7 @@ dependencies = [ "thiserror 1.0.69", "tikv-jemalloc-ctl", "tikv-jemallocator", + "tokio", "toml 0.8.23", "tower-http 0.5.2", "tracing", diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index 8ee55a00b72..9d2d0cbbdc6 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use spacetimedb_commitlog::SizeOnDisk; use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; -use spacetimedb_snapshot::SnapshotStore; +use spacetimedb_snapshot::SnapshotRepo; use crate::{messages::control_db::Database, runtime::RuntimeDispatch, util::asyncify}; @@ -35,12 +35,6 @@ pub struct Persistence { /// Currently the expectation is that the reported size is the commitlog /// size only. pub disk_size: DiskSizeFn, - /// Optional snapshot storage used when restoring from snapshots. - /// - /// This is separate from [SnapshotWorker] so deterministic simulation - /// targets can use synchronous snapshot creation without starting the - /// Tokio-backed worker. - pub snapshot_store: Option>, /// An optional [SnapshotWorker]. /// /// The current expectation is that snapshots are only enabled for @@ -71,17 +65,14 @@ impl Persistence { Self { durability: Arc::new(durability), disk_size: Arc::new(disk_size), - snapshot_store: None, snapshots, runtime, } } - /// If snapshots are enabled, get the snapshot storage they are stored in. - pub fn snapshot_store(&self) -> Option> { - self.snapshot_store - .clone() - .or_else(|| self.snapshots.as_ref().map(|worker| worker.snapshot_store())) + /// If snapshots are enabled, get the snapshot repository they are stored in. + pub fn snapshot_repo(&self) -> Option> { + self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } /// Get the [TxOffset] reported as durable by the [Durability] impl. @@ -115,7 +106,6 @@ impl Persistence { |Self { durability, disk_size, - snapshot_store: _, snapshots, runtime, }| (Some(durability), Some(disk_size), snapshots, Some(runtime)), @@ -167,7 +157,13 @@ impl PersistenceProvider for LocalPersistenceProvider { let snapshot_worker = asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled))?; + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Enabled, + RuntimeDispatch::tokio_current(), + ) + })?; let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; tokio::spawn(relational_db::snapshot_watching_commitlog_compressor( @@ -180,7 +176,6 @@ impl PersistenceProvider for LocalPersistenceProvider { Ok(Persistence { durability, disk_size, - snapshot_store: None, snapshots: Some(snapshot_worker), runtime: RuntimeDispatch::tokio_current(), }) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index ebf92bab992..28f2e1a6c1f 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -52,7 +52,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotError, SnapshotRepository, SnapshotStore}; +use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotError, SnapshotRepo, SnapshotRepository}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -282,10 +282,10 @@ impl RelationalDB { let start_time = std::time::Instant::now(); - let snapshot_store = persistence.as_ref().and_then(|p| p.snapshot_store()); + let snapshot_repo = persistence.as_ref().and_then(|p| p.snapshot_repo()); let inner = Self::restore_from_snapshot_or_bootstrap( database_identity, - snapshot_store.as_deref(), + snapshot_repo.as_deref(), durable_tx_offset, min_commitlog_offset, page_pool, @@ -294,10 +294,10 @@ impl RelationalDB { // Sanity check because the snapshot worker could've been used before. debug_assert!( persistence - .snapshot_store() - .map(|store| store.database_identity() == database_identity) + .snapshot_repo() + .map(|repo| repo.database_identity() == database_identity) .unwrap_or(true), - "snapshot store does not match database identity", + "snapshot repo does not match database identity", ); persistence.set_snapshot_state(inner.committed_state.clone()); } @@ -476,7 +476,7 @@ impl RelationalDB { fn restore_from_snapshot_or_bootstrap( database_identity: Identity, - snapshot_store: Option<&dyn SnapshotStore>, + snapshot_repo: Option<&dyn SnapshotRepo>, durable_tx_offset: Option, min_commitlog_offset: TxOffset, page_pool: PagePool, @@ -484,14 +484,14 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_store: &dyn SnapshotStore, + snapshot_repo: &(impl SnapshotRepo + ?Sized), snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { log::info!("[{database_identity}] DATABASE: restoring snapshot of tx_offset {snapshot_offset}"); let start = std::time::Instant::now(); - let snapshot = snapshot_store + let snapshot = snapshot_repo .read_snapshot(snapshot_offset, page_pool) .map_err(Box::new)?; @@ -557,11 +557,11 @@ impl RelationalDB { } } - if let Some((snapshot_store, durable_tx_offset)) = snapshot_store.zip(durable_tx_offset) { + if let Some((snapshot_repo, durable_tx_offset)) = snapshot_repo.zip(durable_tx_offset) { // Mark any newer snapshots as invalid, as the history past // `durable_tx_offset` may have been reset and thus diverge from // any snapshots taken earlier. - snapshot_store + snapshot_repo .invalidate_newer_snapshots(durable_tx_offset) .map_err(|e| RestoreSnapshotError::Invalidate { offset: durable_tx_offset, @@ -572,7 +572,7 @@ impl RelationalDB { // range `(min_commitlog_offset + 1)..=durable_tx_offset`. let mut upper_bound = durable_tx_offset; loop { - let Some(snapshot_offset) = snapshot_store + let Some(snapshot_offset) = snapshot_repo .latest_snapshot_older_than(upper_bound) .map_err(Box::new)? else { @@ -582,7 +582,7 @@ impl RelationalDB { log::debug!("snapshot_offset={snapshot_offset} min_commitlog_offset={min_commitlog_offset}"); break; } - match try_load_snapshot(&database_identity, snapshot_store, snapshot_offset, &page_pool) { + match try_load_snapshot(&database_identity, snapshot_repo, snapshot_offset, &page_pool) { Ok(snapshot) if snapshot.database_identity != database_identity => { return Err(RestoreSnapshotError::IdentityMismatch { expected: database_identity, @@ -598,7 +598,7 @@ impl RelationalDB { // Newly created snapshots should not depend on it. if !is_transient_error(&e) { log::info!("invalidating bad snapshot at {snapshot_offset}"); - snapshot_store.invalidate_snapshot(snapshot_offset).map_err(|e| { + snapshot_repo.invalidate_snapshot(snapshot_offset).map_err(|e| { RestoreSnapshotError::Invalidate { offset: snapshot_offset, source: Box::new(e), @@ -897,22 +897,6 @@ impl RelationalDB { self.snapshot_worker.as_ref().map(|snap| snap.subscribe()) } - /// Capture a snapshot synchronously into `repo`. - /// - /// This is primarily used by deterministic tests which cannot use the - /// Tokio-backed [`SnapshotWorker`]. - pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result, DBError> { - Ok(self.inner.take_snapshot(repo)?) - } - - /// Capture a snapshot into a repository abstraction. - /// - /// This is used by simulator-backed tests which need controlled storage - /// instead of a filesystem path. - pub fn take_snapshot_store(&self, store: &dyn SnapshotStore) -> Result, DBError> { - Ok(self.inner.take_snapshot_store(store)?) - } - /// Run a fallible function in a transaction. /// /// If the supplied function returns `Ok`, the transaction is automatically @@ -1959,7 +1943,13 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id) - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Disabled)) + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + RuntimeDispatch::tokio(rt.clone()), + ) + }) }) .transpose()?; @@ -1969,7 +1959,6 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, - snapshot_store: None, snapshots, runtime: RuntimeDispatch::tokio(rt), }; @@ -2083,7 +2072,13 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0) - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Disabled)) + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + RuntimeDispatch::tokio(rt.clone()), + ) + }) }) .transpose()?; let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; @@ -2091,7 +2086,6 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, - snapshot_store: None, snapshots, runtime: RuntimeDispatch::tokio(rt), }; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index bae4a7bf980..12808cb982d 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,10 +14,10 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, SnapshotRepository, SnapshotStore}; +use spacetimedb_snapshot::{CompressionStats, SnapshotRepo, SnapshotRepository}; use tokio::sync::watch; -use crate::{util::asyncify, worker_metrics::WORKER_METRICS}; +use crate::{runtime::RuntimeDispatch, worker_metrics::WORKER_METRICS}; pub type SnapshotDatabaseState = Arc>; @@ -60,7 +60,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repository: Arc, + snapshot_repo: Arc, } impl SnapshotWorker { @@ -69,29 +69,26 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression) -> Self { - let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); + pub fn new(snapshot_repo: Arc, runtime: RuntimeDispatch) -> Self { + let database = snapshot_repo.database_identity(); + let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repo: snapshot_repository.clone(), + snapshot_repo: snapshot_repo.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), - compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repository.clone(), - metrics: CompressionMetrics::new(database), - stats: <_>::default(), - }), + runtime: runtime.clone(), + compression: None, }; - tokio::spawn(actor.run()); + runtime.spawn(actor.run()); Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository, + snapshot_repo, } } @@ -105,14 +102,9 @@ impl SnapshotWorker { .expect("snapshot worker panicked"); } - /// Get the [SnapshotRepository] this worker is operating on. - pub fn repo(&self) -> &SnapshotRepository { - &self.snapshot_repository - } - - /// Get this worker's snapshot storage as a generic snapshot store. - pub fn snapshot_store(&self) -> Arc { - self.snapshot_repository.clone() + /// Get the snapshot repository this worker is operating on. + pub fn snapshot_repo(&self) -> Arc { + self.snapshot_repo.clone() } /// Request a snapshot to be taken. @@ -146,6 +138,40 @@ impl SnapshotWorker { } } +impl SnapshotWorker { + pub fn new_with_repository( + snapshot_repository: Arc, + compression: Compression, + runtime: RuntimeDispatch, + ) -> Self { + let database = snapshot_repository.database_identity(); + let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); + let (snapshot_created, _) = watch::channel(latest_snapshot); + let (request_tx, request_rx) = mpsc::unbounded(); + + let actor = SnapshotWorkerActor { + snapshot_requests: request_rx, + snapshot_repo: snapshot_repository.clone(), + snapshot_created: snapshot_created.clone(), + metrics: SnapshotMetrics::new(database), + runtime: runtime.clone(), + compression: compression.is_enabled().then(|| Compressor { + snapshot_repo: snapshot_repository.clone(), + metrics: CompressionMetrics::new(database), + stats: <_>::default(), + runtime: runtime.clone(), + }), + }; + runtime.spawn(actor.run()); + + Self { + snapshot_created, + request_snapshot: request_tx, + snapshot_repo: snapshot_repository, + } + } +} + struct SnapshotMetrics { snapshot_timing_total: Histogram, snapshot_timing_inner: Histogram, @@ -171,9 +197,10 @@ enum Request { struct SnapshotWorkerActor { snapshot_requests: mpsc::UnboundedReceiver, - snapshot_repo: Arc, + snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, + runtime: RuntimeDispatch, compression: Option, } @@ -225,24 +252,23 @@ impl SnapshotWorkerActor { let inner_timer = self.metrics.snapshot_timing_inner.clone(); let snapshot_repo = self.snapshot_repo.clone(); + let runtime = self.runtime.clone(); let database_identity = self.snapshot_repo.database_identity(); - let maybe_snapshot = asyncify(move || { - let _timer = inner_timer.start_timer(); - Locking::take_snapshot_internal(&state, &snapshot_repo) - }) - .await - .with_context(|| format!("error capturing snapshot of database {}", database_identity))?; - let (snapshot_offset, unflushed_snapshot) = maybe_snapshot.with_context(|| { - format!( - "refusing to take snapshot of database {} at TX offset -1", - database_identity - ) - })?; - self.metrics - .snapshot_timing_fsync - .observe_closure_duration(|| unflushed_snapshot.sync_all())?; + let snapshot_offset = runtime + .spawn_blocking(move || { + let _timer = inner_timer.start_timer(); + Locking::take_snapshot_store_internal(&state, snapshot_repo.as_ref()) + }) + .await + .with_context(|| format!("error capturing snapshot of database {}", database_identity))? + .with_context(|| { + format!( + "refusing to take snapshot of database {} at TX offset -1", + database_identity + ) + })?; let elapsed = Duration::from_secs_f64(timer.stop_and_record()); info!( @@ -315,6 +341,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, + runtime: RuntimeDispatch, } impl Compressor { @@ -346,7 +373,8 @@ impl Compressor { let range = start..latest_snapshot; let mut stats = self.stats.take().unwrap_or_default(); - let (mut stats, res) = asyncify({ + let runtime = self.runtime.clone(); + let (mut stats, res) = runtime.spawn_blocking({ let range = range.clone(); move || { let _timer = inner_timer.start_timer(); diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 7d9e1c8185e..f82d36286d4 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2060,7 +2060,6 @@ mod tests { Some(Persistence { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), - snapshot_store: None, snapshots: None, runtime: crate::runtime::RuntimeDispatch::tokio(rt), }), diff --git a/crates/dst/src/properties/runtime.rs b/crates/dst/src/properties/runtime.rs index 52e0140da37..c6f67c26e3f 100644 --- a/crates/dst/src/properties/runtime.rs +++ b/crates/dst/src/properties/runtime.rs @@ -412,9 +412,6 @@ where self.observe_table_observation(engine, table_interaction, table_observation) } (_, CommitlogObservation::DynamicMigrationProbe(probe)) => self.on_dynamic_migration_probe(engine, probe), - (CommitlogInteraction::TakeSnapshot, CommitlogObservation::Snapshot(snapshot)) => { - self.on_snapshot_capture(engine, snapshot) - } (_, CommitlogObservation::DurableReplay(replay)) => self.on_durable_replay(engine, replay), (_, CommitlogObservation::Applied | CommitlogObservation::Skipped) => Ok(()), (other, observation) => Err(format!( diff --git a/crates/dst/src/sim/snapshot.rs b/crates/dst/src/sim/snapshot.rs index 0d851c62259..f35dfc844eb 100644 --- a/crates/dst/src/sim/snapshot.rs +++ b/crates/dst/src/sim/snapshot.rs @@ -6,10 +6,10 @@ use std::sync::Arc; -use spacetimedb_core::db::relational_db::RelationalDB; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{MemorySnapshotRepository, SnapshotStore}; +use spacetimedb_snapshot::{MemorySnapshotRepository, ReconstructedSnapshot, SnapshotError, SnapshotRepo}; +use spacetimedb_table::{blob_store::BlobStore, page_pool::PagePool, table::Table}; use crate::{ seed::DstSeed, @@ -27,7 +27,7 @@ pub(crate) fn is_injected_snapshot_error_text(text: &str) -> bool { } pub(crate) struct SnapshotRestoreRepo { - pub(crate) store: Option>, + pub(crate) store: Option>, pub(crate) restored_snapshot_offset: Option, pub(crate) latest_snapshot_offset: Option, } @@ -42,6 +42,7 @@ pub(crate) struct SnapshotRestoreRepo { /// This is the intended boundary for the current DST target. It exercises /// capture/restore behavior, retry classification, and replay correctness. It /// does not model torn snapshot pages or byte-level corruption. +#[derive(Clone)] pub(crate) struct BuggifiedSnapshotRepo { repo: Arc, faults: StorageFaultController, @@ -75,19 +76,6 @@ impl BuggifiedSnapshotRepo { }) } - pub(crate) fn capture_from(&self, db: &RelationalDB) -> Result, String> { - self.faults.maybe_latency(); - self.inject(StorageFaultKind::Open)?; - self.inject(StorageFaultKind::Metadata)?; - self.inject(StorageFaultKind::Write)?; - self.inject(StorageFaultKind::Fsync)?; - - let created = db - .take_snapshot_store(self.repo.as_ref()) - .map_err(|err| format!("snapshot capture failed: {err}"))?; - Ok(created) - } - pub(crate) fn repo_for_restore(&self, durable_offset: Option) -> Result { let latest_snapshot_offset = self.latest_snapshot_unfaulted()?; self.faults.maybe_latency(); @@ -125,6 +113,77 @@ impl BuggifiedSnapshotRepo { } } +impl SnapshotRepo for BuggifiedSnapshotRepo { + fn database_identity(&self) -> Identity { + self.repo.database_identity() + } + + fn capture_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Open) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Write) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Fsync) + .map_err(SnapshotError::Io)?; + self.repo.capture_snapshot(tables, blobs, tx_offset) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Open) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Read) + .map_err(SnapshotError::Io)?; + self.repo.read_snapshot(tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.latest_snapshot_older_than(upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.latest_snapshot() + } + + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.invalidate_newer_snapshots(upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.invalidate_snapshot(tx_offset) + } +} + #[cfg(test)] mod tests { use crate::{config::CommitlogFaultProfile, seed::DstSeed}; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 750f955e45d..3256cb1d0fe 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -4,7 +4,7 @@ use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, sy use spacetimedb_commitlog::repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}; use spacetimedb_core::{ - db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB, Tx as RelTx}, + db::relational_db::{MutTx as RelMutTx, Persistence, RelationalDB, SnapshotWorker, Tx as RelTx}, error::{DBError, DatastoreError, IndexError}, messages::control_db::HostType, }; @@ -45,7 +45,6 @@ use crate::{ workload::{ commitlog_ops::{ CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary, - SnapshotCaptureStatus, SnapshotObservation, }, commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, table_ops::{ @@ -164,7 +163,6 @@ impl RunStats { CommitlogInteraction::CreateDynamicTable { .. } => self.interactions.create_dynamic_table += 1, CommitlogInteraction::DropDynamicTable { .. } => self.interactions.drop_dynamic_table += 1, CommitlogInteraction::MigrateDynamicTable { .. } => self.interactions.migrate_dynamic_table += 1, - CommitlogInteraction::TakeSnapshot => self.interactions.snapshot_requested += 1, CommitlogInteraction::CloseReopen => self.interactions.close_reopen_requested += 1, } } @@ -182,16 +180,6 @@ impl RunStats { _ => {} } } - if matches!(interaction, CommitlogInteraction::TakeSnapshot) { - match observation { - CommitlogObservation::Snapshot(SnapshotObservation { - status: SnapshotCaptureStatus::Captured { .. }, - .. - }) => self.interactions.snapshot_created += 1, - CommitlogObservation::Snapshot(_) => self.interactions.snapshot_skipped += 1, - _ => {} - } - } } fn record_table_operation(&mut self, case: TableInteractionCase) { @@ -251,6 +239,7 @@ struct ReopenedRelationalDb { db: RelationalDB, restored_snapshot_offset: Option, latest_snapshot_offset: Option, + snapshot_worker: SnapshotWorker, } /// Engine executing mixed table+lifecycle interactions while recording mocked durable history. @@ -270,6 +259,7 @@ struct RelationalDbEngine { durability_opts: spacetimedb_durability::local::Options, commitlog_repo: StressCommitlogRepo, snapshot_repo: StressSnapshotRepo, + snapshot_worker: SnapshotWorker, stats: RunStats, } @@ -298,6 +288,7 @@ impl RelationalDbEngine { durability_opts: bootstrap.durability_opts, commitlog_repo: bootstrap.commitlog_repo, snapshot_repo: bootstrap.snapshot_repo, + snapshot_worker: bootstrap.snapshot_worker, stats: RunStats { runtime: RuntimeStats::default(), ..Default::default() @@ -375,7 +366,6 @@ impl RelationalDbEngine { CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), - CommitlogInteraction::TakeSnapshot => self.take_snapshot().await, CommitlogInteraction::CloseReopen => self.close_and_reopen().await, }?; if !matches!(interaction, CommitlogInteraction::CloseReopen) { @@ -411,6 +401,7 @@ impl RelationalDbEngine { self.db = Some(reopened.db); self.last_restored_snapshot_offset = reopened.restored_snapshot_offset; self.latest_snapshot_offset = reopened.latest_snapshot_offset; + self.snapshot_worker = reopened.snapshot_worker; self.rebuild_table_handles_after_reopen()?; self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); let replay = self.durable_replay_summary()?; @@ -422,46 +413,6 @@ impl RelationalDbEngine { Ok(CommitlogObservation::DurableReplay(replay)) } - async fn take_snapshot(&mut self) -> Result { - let latest_before = self.snapshot_repo.latest_snapshot_unfaulted()?; - if self.execution.active_writer.is_some() - || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) - || self.read_tx_by_connection.iter().any(|tx| tx.is_some()) - { - trace!("skip snapshot while transaction is open"); - return self.snapshot_observation(latest_before, SnapshotCaptureStatus::SkippedOpenTransaction); - } - - self.wait_for_requested_durability(true).await?; - match self.snapshot_repo.capture_from(self.db()?) { - Ok(Some(offset)) => { - debug!(offset, "captured DST snapshot"); - self.snapshot_observation(latest_before, SnapshotCaptureStatus::Captured { offset }) - } - Ok(None) => self.snapshot_observation(latest_before, SnapshotCaptureStatus::SkippedNoSnapshotCreated), - Err(err) if is_injected_snapshot_error_text(&err) => { - trace!(error = %err, "injected snapshot fault skipped snapshot capture"); - self.snapshot_observation(latest_before, SnapshotCaptureStatus::SkippedInjectedFault) - } - Err(err) => Err(err), - } - } - - fn snapshot_observation( - &mut self, - latest_before: Option, - status: SnapshotCaptureStatus, - ) -> Result { - let latest_after = self.snapshot_repo.latest_snapshot_unfaulted()?; - self.latest_snapshot_offset = latest_after; - Ok(CommitlogObservation::Snapshot(SnapshotObservation { - durable_offset: self.last_observed_durable_offset, - latest_before, - latest_after, - status, - })) - } - fn reopen_from_history_with_fault_retry(&self, context: &'static str) -> Result { match self.reopen_from_history() { Ok(reopened) => Ok(reopened), @@ -481,11 +432,14 @@ impl RelationalDbEngine { ); let durable_offset = durability.durable_tx_offset().last_seen(); let snapshot_restore = self.snapshot_repo.repo_for_restore(durable_offset)?; + let snapshot_worker = SnapshotWorker::new( + Arc::new(self.snapshot_repo.clone()), + spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), + ); let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), - snapshot_store: snapshot_restore.store, - snapshots: None, + snapshots: Some(snapshot_worker.clone()), runtime: spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), }; let (db, connected_clients) = RelationalDB::open( @@ -507,6 +461,7 @@ impl RelationalDbEngine { db, restored_snapshot_offset: snapshot_restore.restored_snapshot_offset, latest_snapshot_offset: snapshot_restore.latest_snapshot_offset, + snapshot_worker, }) } @@ -1511,6 +1466,7 @@ struct RelationalDbBootstrap { db: RelationalDB, commitlog_repo: StressCommitlogRepo, snapshot_repo: StressSnapshotRepo, + snapshot_worker: SnapshotWorker, durability: Arc, durability_opts: spacetimedb_durability::local::Options, } @@ -1531,11 +1487,14 @@ fn bootstrap_relational_db( InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), durability_opts) .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?, ); + let snapshot_worker = SnapshotWorker::new( + Arc::new(snapshot_repo.clone()), + spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), + ); let persistence = Persistence { durability: durability.clone(), disk_size: Arc::new(in_memory_size_on_disk), - snapshot_store: None, - snapshots: None, + snapshots: Some(snapshot_worker.clone()), runtime: spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), }; let (db, connected_clients) = RelationalDB::open( @@ -1554,6 +1513,7 @@ fn bootstrap_relational_db( db, commitlog_repo, snapshot_repo, + snapshot_worker, durability, durability_opts, }) diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs index 46d84121631..4e6a173c6a8 100644 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -106,10 +106,6 @@ impl CommitlogWorkloadSource { self.pending.push_back(CommitlogInteraction::CloseReopen); } - if Percent::new(self.profile.snapshot_pct).sample(&mut self.rng) { - self.pending.push_back(CommitlogInteraction::TakeSnapshot); - } - if Percent::new(self.profile.create_dynamic_table_pct).sample(&mut self.rng) { let conn = ConnectionChoice { connection_count: self.num_connections, diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs index 0c0591f102f..62711866eb4 100644 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -18,8 +18,6 @@ pub enum CommitlogInteraction { DropDynamicTable { conn: SessionId, slot: u32 }, /// Migrate dynamic table schema for a slot. MigrateDynamicTable { conn: SessionId, slot: u32 }, - /// Capture a durable snapshot of the current database state. - TakeSnapshot, /// Close and restart the database from durable history. CloseReopen, } diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 8721ddc89a2..7633ef08e40 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -67,6 +67,28 @@ impl RuntimeDispatch { } } + pub async fn spawn_blocking(&self, f: F) -> R + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = &f; + match self { + #[cfg(feature = "tokio")] + Self::Tokio(_) => tokio::task::spawn_blocking(f) + .await + .unwrap_or_else(|e| match e.try_into_panic() { + Ok(panic_payload) => std::panic::resume_unwind(panic_payload), + Err(e) => panic!("Unexpected JoinError: {e}"), + }), + #[cfg(feature = "simulation")] + Self::Simulation(handle) => handle.spawn_on(sim::NodeId::MAIN, async move { f() }).await, + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } + pub async fn timeout( &self, timeout_after: Duration, diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index ade599853ff..709973e376b 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -1342,7 +1342,7 @@ impl SnapshotRepository { /// [`MemorySnapshotRepository`] to keep snapshot storage inside the simulator /// boundary instead of depending on temporary directories or host filesystem /// behavior. -pub trait SnapshotStore: Send + Sync { +pub trait SnapshotRepo: Send + Sync { fn database_identity(&self) -> Identity; fn capture_snapshot<'db>( @@ -1365,7 +1365,7 @@ pub trait SnapshotStore: Send + Sync { fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError>; } -impl SnapshotStore for SnapshotRepository { +impl SnapshotRepo for SnapshotRepository { fn database_identity(&self) -> Identity { SnapshotRepository::database_identity(self) } @@ -1483,7 +1483,7 @@ impl MemorySnapshotRepository { } } -impl SnapshotStore for MemorySnapshotRepository { +impl SnapshotRepo for MemorySnapshotRepository { fn database_identity(&self) -> Identity { MemorySnapshotRepository::database_identity(self) } @@ -1518,6 +1518,8 @@ impl SnapshotStore for MemorySnapshotRepository { } } +pub use SnapshotRepo as SnapshotStore; + #[derive(Clone)] struct MemorySnapshot { database_identity: Identity, diff --git a/crates/standalone/Cargo.toml b/crates/standalone/Cargo.toml index 98295ef6a90..3bc7335625a 100644 --- a/crates/standalone/Cargo.toml +++ b/crates/standalone/Cargo.toml @@ -54,7 +54,7 @@ serde_json.workspace = true sled.workspace = true socket2.workspace = true thiserror.workspace = true -tokio = { package = "madsim-tokio", path = "../../../../madsim/madsim-tokio", features = ["full"] } +tokio = { workspace = true, features = ["full"] } tower-http.workspace = true toml.workspace = true tracing = { workspace = true, features = ["release_max_level_debug"] } From 692dfe2138e81f9c376d2d669d95e9cfb476edee Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 7 May 2026 16:17:56 +0530 Subject: [PATCH 33/74] cleanup --- crates/commitlog/src/lib.rs | 6 +- crates/core/src/db/relational_db.rs | 11 +- crates/core/src/util/jobs.rs | 27 - .../src/locking_tx_datastore/datastore.rs | 12 +- crates/dst/src/main.rs | 87 +- crates/dst/src/targets/descriptor.rs | 121 +-- crates/dst/src/targets/mod.rs | 1 + .../src/targets/relational_db_commitlog.rs | 70 +- .../src/targets/relational_db_concurrent.rs | 976 ++++++++++++++++++ .../dst/src/workload/table_ops/generation.rs | 45 +- crates/dst/src/workload/table_ops/model.rs | 4 +- .../table_ops/scenarios/random_crud.rs | 30 - 12 files changed, 1081 insertions(+), 309 deletions(-) create mode 100644 crates/dst/src/targets/relational_db_concurrent.rs diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index 26d37f97966..1444bdf2552 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -151,7 +151,11 @@ impl Options { } } -/// The canonical commitlog, backed by on-disk log files. +/// The canonical commitlog API over a repository backend `R`. +/// +/// The default backend is the on-disk filesystem repository +/// [`repo::Fs`], but tests and simulators may supply another [`Repo`] +/// implementation. /// /// Records in the log are of type `T`, which canonically is instantiated to /// [`payload::Txdata`]. diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 28f2e1a6c1f..5907a8ddf4c 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -41,7 +41,9 @@ use spacetimedb_lib::db::raw_def::v9::{btree, RawModuleDefV9Builder, RawSql}; use spacetimedb_lib::st_var::StVarValue; use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; -use spacetimedb_paths::server::{ReplicaDir, SnapshotDirPath, SnapshotsPath}; +use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; +#[cfg(test)] +use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_primitives::*; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; @@ -237,11 +239,12 @@ impl RelationalDB { /// /// `None` may be passed to obtain an in-memory only database. /// - /// - snapshot storage + /// - snapshots /// - /// The [`SnapshotStore`] which stores snapshots of this database. + /// Optional snapshot persistence and background snapshot execution, + /// carried through [`Persistence`]. /// This is only meaningful if `history` and `durability` are also supplied. - /// If restoring from an existing database, the snapshot store must + /// If restoring from an existing database, the snapshot repository must /// store views of the same sequence of TXes as the `history`. /// /// - `metrics_recorder_queue` diff --git a/crates/core/src/util/jobs.rs b/crates/core/src/util/jobs.rs index c85be43ff90..cf5e7997fdd 100644 --- a/crates/core/src/util/jobs.rs +++ b/crates/core/src/util/jobs.rs @@ -7,7 +7,6 @@ use futures::FutureExt; use indexmap::IndexMap; use smallvec::SmallVec; use spacetimedb_data_structures::map::HashMap; -#[cfg(not(simulation))] use tokio::runtime; use tokio::sync::{mpsc, oneshot, watch}; use tracing::Instrument; @@ -295,14 +294,10 @@ pub struct SingleCoreExecutor { struct SingleCoreExecutorInner { /// The sending end of a channel over which we send jobs. job_tx: mpsc::UnboundedSender LocalBoxFuture<'static, ()> + Send>>, - #[cfg(simulation)] - /// Retains the allocation guard for the lifetime of the simulated executor. - _guard: LoadBalanceOnDropGuard, } impl SingleCoreExecutor { /// Spawn a `SingleCoreExecutor` on the given core. - #[cfg(not(simulation))] fn spawn(core: AllocatedJobCore, name: Option) -> Self { let AllocatedJobCore { guard, mut pinner } = core; @@ -342,28 +337,6 @@ impl SingleCoreExecutor { Self { inner } } - /// Spawn a simulated `SingleCoreExecutor`. - /// - /// In simulation, job execution models the same logical single-core queue - /// without creating an OS thread or re-entering a Tokio runtime with - /// `Handle::block_on`. - #[cfg(simulation)] - fn spawn(core: AllocatedJobCore) -> Self { - let AllocatedJobCore { guard, pinner: _ } = core; - - let (job_tx, mut job_rx) = mpsc::unbounded_channel(); - - let inner = Arc::new(SingleCoreExecutorInner { job_tx, _guard: guard }); - - tokio::task::spawn_local(async move { - while let Some(job) = job_rx.recv().await { - tokio::task::spawn_local(job()); - } - }); - - Self { inner } - } - /// Create a `SingleCoreExecutor` which runs jobs in [`tokio::runtime::Handle::current`]. /// /// Callers should most likely instead construct a `SingleCoreExecutor` via [`JobCores::take`], diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index 13057e18a37..fd1da554c88 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -39,7 +39,7 @@ use spacetimedb_schema::{ reducer_name::ReducerName, schema::{ColumnSchema, IndexSchema, SequenceSchema, TableSchema}, }; -use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotRepository, SnapshotStore, UnflushedSnapshot}; +use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotRepo, SnapshotRepository, UnflushedSnapshot}; use spacetimedb_table::{ indexes::RowPointer, page_pool::PagePool, @@ -234,14 +234,6 @@ impl Locking { .map_err(Into::into) } - /// Take a snapshot through a repository abstraction. - /// - /// Unlike [`Self::take_snapshot`], this does not expose filesystem paths and - /// can therefore be backed by in-memory simulator storage. - pub fn take_snapshot_store(&self, store: &dyn SnapshotStore) -> Result> { - Self::take_snapshot_store_internal(&self.committed_state, store) - } - pub fn assert_system_tables_match(&self) -> Result<()> { let committed_state = self.committed_state.read_arc(); committed_state.assert_system_table_schemas_match() @@ -270,7 +262,7 @@ impl Locking { pub fn take_snapshot_store_internal( committed_state: &RwLock, - store: &dyn SnapshotStore, + store: &dyn SnapshotRepo, ) -> Result> { let mut committed_state = committed_state.write(); let Some(tx_offset) = committed_state.next_tx_offset.checked_sub(1) else { diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index 01d0961177c..53e368adb92 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -1,11 +1,10 @@ use std::time::{SystemTime, UNIX_EPOCH}; -use clap::{Args, Parser, Subcommand, ValueEnum}; +use clap::{Args, Parser, Subcommand}; use spacetimedb_dst::{ - config::{CommitlogFaultProfile, RunConfig}, + config::RunConfig, seed::DstSeed, - targets::descriptor::{RelationalDbCommitlogDescriptor, TargetDescriptor}, - workload::table_ops::TableScenarioId, + targets::descriptor::{RelationalDbConcurrentDescriptor, TargetDescriptor}, }; #[derive(Parser, Debug)] @@ -21,18 +20,8 @@ enum Command { Run(RunArgs), } -#[derive(Args, Debug, Clone)] -struct TargetArgs { - #[arg(long, value_enum, default_value_t = TargetKind::RelationalDbCommitlog)] - target: TargetKind, - #[arg(long, value_enum, default_value_t = ScenarioKind::RandomCrud)] - scenario: ScenarioKind, -} - #[derive(Args, Debug)] struct RunArgs { - #[command(flatten)] - target: TargetArgs, #[arg(long, help = "Seed for generated choices. Defaults to wall-clock time.")] seed: Option, #[arg( @@ -42,44 +31,6 @@ struct RunArgs { duration: Option, #[arg(long, help = "Deterministic interaction budget. Preferred for replayable failures.")] max_interactions: Option, - #[arg( - long, - value_enum, - default_value_t = CommitlogFaultProfileKind::Default, - help = "Commitlog disk-fault profile for commitlog-backed targets." - )] - commitlog_fault_profile: CommitlogFaultProfileKind, -} - -#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] -enum TargetKind { - RelationalDbCommitlog, -} - -#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] -enum ScenarioKind { - RandomCrud, - IndexedRanges, - Banking, -} - -#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] -enum CommitlogFaultProfileKind { - Off, - Light, - Default, - Aggressive, -} - -impl From for CommitlogFaultProfile { - fn from(profile: CommitlogFaultProfileKind) -> Self { - match profile { - CommitlogFaultProfileKind::Off => Self::Off, - CommitlogFaultProfileKind::Light => Self::Light, - CommitlogFaultProfileKind::Default => Self::Default, - CommitlogFaultProfileKind::Aggressive => Self::Aggressive, - } - } } fn main() -> anyhow::Result<()> { @@ -104,18 +55,9 @@ fn init_tracing() { fn run_command(args: RunArgs) -> anyhow::Result<()> { let seed = resolve_seed(args.seed); - let config = build_config( - args.duration.as_deref(), - args.max_interactions, - args.commitlog_fault_profile, - )?; + let config = build_config(args.duration.as_deref(), args.max_interactions)?; - match args.target.target { - TargetKind::RelationalDbCommitlog => { - let scenario = map_table_scenario(args.target.scenario)?; - run_prepared_target::(seed, scenario, config) - } - } + run_prepared_target::(seed, (), config) } fn run_prepared_target( @@ -136,14 +78,6 @@ where .unwrap_or_else(|payload| std::panic::resume_unwind(payload)) } -fn map_table_scenario(scenario: ScenarioKind) -> anyhow::Result { - match scenario { - ScenarioKind::RandomCrud => Ok(TableScenarioId::RandomCrud), - ScenarioKind::IndexedRanges => Ok(TableScenarioId::IndexedRanges), - ScenarioKind::Banking => Ok(TableScenarioId::Banking), - } -} - fn resolve_seed(seed: Option) -> DstSeed { seed.map(DstSeed).unwrap_or_else(|| { let nanos = SystemTime::now() @@ -154,12 +88,8 @@ fn resolve_seed(seed: Option) -> DstSeed { }) } -fn build_config( - duration: Option<&str>, - max_interactions: Option, - commitlog_fault_profile: CommitlogFaultProfileKind, -) -> anyhow::Result { - let config = match (duration, max_interactions) { +fn build_config(duration: Option<&str>, max_interactions: Option) -> anyhow::Result { + Ok(match (duration, max_interactions) { (Some(duration), Some(max_interactions)) => RunConfig { max_interactions: Some(max_interactions), max_duration_ms: Some(spacetimedb_dst::config::parse_duration_spec(duration)?.as_millis() as u64), @@ -168,8 +98,7 @@ fn build_config( (Some(duration), None) => RunConfig::with_duration_spec(duration)?, (None, Some(max_interactions)) => RunConfig::with_max_interactions(max_interactions), (None, None) => RunConfig::with_max_interactions(1_000), - }; - Ok(config.with_commitlog_fault_profile(commitlog_fault_profile.into())) + }) } #[allow(clippy::disallowed_macros)] diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index ec0ede37d93..5a387625536 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -2,7 +2,7 @@ use std::{future::Future, pin::Pin}; -use crate::{config::RunConfig, seed::DstSeed, workload::table_ops::TableScenarioId}; +use crate::{config::RunConfig, seed::DstSeed}; /// Descriptor contract: CLI talks to this, not per-target ad hoc handlers. pub trait TargetDescriptor { @@ -18,117 +18,44 @@ pub trait TargetDescriptor { pub type TargetRunFuture = Pin>>>; -pub struct RelationalDbCommitlogDescriptor; +pub struct RelationalDbConcurrentDescriptor; -impl TargetDescriptor for RelationalDbCommitlogDescriptor { - const NAME: &'static str = "relational_db_commitlog"; - type Scenario = TableScenarioId; +impl TargetDescriptor for RelationalDbConcurrentDescriptor { + const NAME: &'static str = "relational_db_concurrent"; + type Scenario = (); - fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { + fn run_streaming(seed: DstSeed, _scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { Box::pin(async move { - let outcome = - crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config) - .await?; - Ok(format_relational_db_commitlog_outcome(Self::NAME, seed, &outcome)) + let outcome = crate::targets::relational_db_concurrent::run_generated_with_config(seed, config).await?; + Ok(format_relational_db_concurrent_outcome(Self::NAME, seed, &outcome)) }) } } -fn format_relational_db_commitlog_outcome( +fn format_relational_db_concurrent_outcome( target: &str, seed: DstSeed, - outcome: &crate::targets::relational_db_commitlog::RelationalDbCommitlogOutcome, + outcome: &crate::targets::relational_db_concurrent::RelationalDbConcurrentOutcome, ) -> String { - let alive_tasks = outcome - .runtime - .runtime_alive_tasks - .map(|count| count.to_string()) - .unwrap_or_else(|| "unknown".to_string()); - format!( concat!( - "ok target={} seed={} steps={}\n", + "ok target={} seed={} rounds={}\n", "\n", - "schema: tables={} columns={} max_columns={} indexes={} extra_indexes={}\n", - "durability: durable_commits={} replay_tables={} restored_snapshot={:?} latest_snapshot={:?}\n", - "interactions: table={} creates={} drops={} migrates={} snapshots={} snapshot_created={} snapshot_skipped={} reopens={} reopen_skipped={} skipped={}\n", - "table_ops:\n", - " tx_control: begin={} commit={} rollback={} begin_read={} release_read={} begin_conflict={} write_conflict={}\n", - " writes: insert={} delete={} exact_dup={} unique_conflict={} missing_delete={} batch_insert={} batch_delete={} reinsert={}\n", - " schema: add_column={} add_index={}\n", - " reads: point_lookup={} predicate_count={} range_scan={} full_scan={}\n", - "transactions: begin={} commit={} rollback={} auto_commit={} read_tx={}\n", - "disk_faults: profile={} latency={} short_read={} short_write={} errors(read={} write={} flush={} fsync={} open={} metadata={})\n", - "snapshot_faults: profile={} latency={} errors(read={} write={} fsync={} open={} metadata={})\n", - "runtime: known_tasks={} durability_actors={} alive_tasks={}" + "clients={} events={} reads={}\n", + "transactions: committed={} write_conflicts={} writer_conflicts={} reader_conflicts={}\n", + "rows: final={} expected={}" ), target, seed.0, - outcome.applied_steps, - outcome.schema.initial_tables, - outcome.schema.initial_columns, - outcome.schema.max_columns_per_table, - outcome.schema.initial_indexes, - outcome.schema.extra_indexes, - outcome.durable_commit_count, - outcome.replay_table_count, - outcome.replay.restored_snapshot_offset, - outcome.replay.latest_snapshot_offset, - outcome.interactions.table, - outcome.interactions.create_dynamic_table, - outcome.interactions.drop_dynamic_table, - outcome.interactions.migrate_dynamic_table, - outcome.interactions.snapshot_requested, - outcome.interactions.snapshot_created, - outcome.interactions.snapshot_skipped, - outcome.interactions.close_reopen_applied, - outcome.interactions.close_reopen_skipped, - outcome.interactions.skipped, - outcome.table_ops.begin_tx, - outcome.table_ops.commit_tx, - outcome.table_ops.rollback_tx, - outcome.table_ops.begin_read_tx, - outcome.table_ops.release_read_tx, - outcome.table_ops.begin_tx_conflict, - outcome.table_ops.write_conflict_insert, - outcome.table_ops.insert, - outcome.table_ops.delete, - outcome.table_ops.exact_duplicate_insert, - outcome.table_ops.unique_key_conflict_insert, - outcome.table_ops.delete_missing, - outcome.table_ops.batch_insert, - outcome.table_ops.batch_delete, - outcome.table_ops.reinsert, - outcome.table_ops.add_column, - outcome.table_ops.add_index, - outcome.table_ops.point_lookup, - outcome.table_ops.predicate_count, - outcome.table_ops.range_scan, - outcome.table_ops.full_scan, - outcome.transactions.explicit_begin, - outcome.transactions.explicit_commit, - outcome.transactions.explicit_rollback, - outcome.transactions.auto_commit, - outcome.transactions.read_tx, - outcome.disk_faults.profile, - outcome.disk_faults.latency, - outcome.disk_faults.short_read, - outcome.disk_faults.short_write, - outcome.disk_faults.read_error, - outcome.disk_faults.write_error, - outcome.disk_faults.flush_error, - outcome.disk_faults.fsync_error, - outcome.disk_faults.open_error, - outcome.disk_faults.metadata_error, - outcome.snapshot_faults.profile, - outcome.snapshot_faults.latency, - outcome.snapshot_faults.read_error, - outcome.snapshot_faults.write_error, - outcome.snapshot_faults.fsync_error, - outcome.snapshot_faults.open_error, - outcome.snapshot_faults.metadata_error, - outcome.runtime.known_runtime_tasks_scheduled, - outcome.runtime.durability_actors_started, - alive_tasks + outcome.rounds, + outcome.clients, + outcome.events, + outcome.reads, + outcome.committed, + outcome.write_conflicts, + outcome.writer_conflicts, + outcome.reader_conflicts, + outcome.final_rows.len(), + outcome.expected_rows.len(), ) } diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index 51a483d73a2..fba30fb371f 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -2,3 +2,4 @@ pub mod descriptor; pub mod relational_db_commitlog; +pub mod relational_db_concurrent; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 3256cb1d0fe..5487befc8ef 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -668,17 +668,32 @@ impl RelationalDbEngine { if self.execution.tx_by_connection[conn.as_index()].is_some() { return Err(format!("connection {conn} already has open transaction")); } - if let Some(owner) = self.execution.active_writer { - self.expect_write_lock_contended(conn, owner, "begin write transaction")?; - return Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)); + match self + .db()? + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + { + Some(tx) => { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + let _ = self.db()?.rollback_mut_tx(tx); + return Err(format!( + "connection {conn} unexpectedly acquired write lock while conflicting transaction was open" + )); + } + self.execution.tx_by_connection[conn.as_index()] = Some(tx); + self.execution.active_writer = Some(conn); + self.stats.transactions.explicit_begin += 1; + Ok(TableObservation::Applied) + } + None => { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)) + } else { + Err(format!( + "connection {conn} failed to begin write transaction without an open conflicting lock" + )) + } + } } - self.execution.tx_by_connection[conn.as_index()] = Some( - self.db()? - .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests), - ); - self.execution.active_writer = Some(conn); - self.stats.transactions.explicit_begin += 1; - Ok(TableObservation::Applied) } fn execute_insert_rows( @@ -771,14 +786,14 @@ impl RelationalDbEngine { return result; } - if let Some(owner) = self.execution.active_writer { - self.expect_write_lock_contended(conn, owner, "auto-commit write")?; + if self.execution.active_writer.is_some() || self.any_open_read_tx() { return Ok(Err(TableErrorKind::WriteConflict)); } let mut tx = self .db()? - .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .ok_or_else(|| format!("connection {conn} failed to acquire write transaction"))?; self.execution.active_writer = Some(conn); let value = match f(self, &mut tx) { Ok(Ok(value)) => value, @@ -824,16 +839,16 @@ impl RelationalDbEngine { return result; } - if let Some(owner) = self.execution.active_writer { - self.expect_write_lock_contended(conn, owner, "auto-commit write")?; + if self.execution.active_writer.is_some() || self.any_open_read_tx() { return Err(format!( - "connection {conn} cannot auto-commit write while connection {owner} owns lock" + "connection {conn} cannot auto-commit write while a conflicting lock is open" )); } let mut tx = self .db()? - .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .ok_or_else(|| format!("connection {conn} failed to acquire write transaction"))?; self.execution.active_writer = Some(conn); let value = match f(self, &mut tx) { Ok(value) => value, @@ -856,17 +871,6 @@ impl RelationalDbEngine { Ok(value) } - fn expect_write_lock_contended(&self, contender: SessionId, owner: SessionId, action: &str) -> Result<(), String> { - let db = self.db()?; - if let Some(tx) = db.try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) { - let _ = db.rollback_mut_tx(tx); - return Err(format!( - "expected write lock contention for connection {contender} during {action} while connection {owner} owns lock, but datastore accepted a second writer" - )); - } - Ok(()) - } - fn try_insert_base_row( &self, tx: &mut RelMutTx, @@ -908,7 +912,7 @@ impl RelationalDbEngine { } fn create_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { - if self.execution.active_writer.is_some() { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { trace!( step = self.step, slot, @@ -952,7 +956,7 @@ impl RelationalDbEngine { } fn drop_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { - if self.execution.active_writer.is_some() { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { trace!( step = self.step, slot, @@ -979,7 +983,7 @@ impl RelationalDbEngine { } fn migrate_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { - if self.execution.active_writer.is_some() { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { trace!( step = self.step, slot, @@ -1043,6 +1047,10 @@ impl RelationalDbEngine { self.execution.active_writer.unwrap_or(conn) } + fn any_open_read_tx(&self) -> bool { + self.read_tx_by_connection.iter().any(Option::is_some) + } + fn refresh_observed_durable_offset(&mut self, forced: bool) -> Result<(), String> { let durable_offset = self.durability.durable_tx_offset().last_seen(); if forced || durable_offset != self.last_observed_durable_offset { diff --git a/crates/dst/src/targets/relational_db_concurrent.rs b/crates/dst/src/targets/relational_db_concurrent.rs new file mode 100644 index 00000000000..233e8dd5300 --- /dev/null +++ b/crates/dst/src/targets/relational_db_concurrent.rs @@ -0,0 +1,976 @@ +//! Concurrent RelationalDB API target. +//! +//! The target models concurrency at RelationalDB lock boundaries. A generated +//! round may hold one or more read transactions, or one write transaction, and +//! then probe whether another client can acquire the write lock. Once a client +//! owns a `Tx` or `MutTx`, that section is synchronous: no simulator yield or +//! async boundary is allowed until the transaction is released, committed, or +//! rolled back. + +use std::{collections::BTreeMap, fmt}; + +use spacetimedb_core::{ + db::relational_db::{MutTx as RelMutTx, RelationalDB, Tx as RelTx}, + error::DBError, + messages::control_db::HostType, +}; +use spacetimedb_datastore::{execution_context::Workload, traits::IsolationLevel}; +use spacetimedb_durability::EmptyHistory; +use spacetimedb_lib::{ + db::auth::{StAccess, StTableType}, + Identity, +}; +use spacetimedb_primitives::TableId; +use spacetimedb_sats::AlgebraicValue; +use spacetimedb_schema::{ + def::BTreeAlgorithm, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, + table_name::TableName, +}; +use spacetimedb_table::page_pool::PagePool; +use tracing::info; + +use crate::{ + client::SessionId, + config::RunConfig, + core::{self, StreamingProperties, TargetEngine, WorkloadSource}, + schema::SimRow, + seed::{DstRng, DstSeed}, +}; + +pub async fn run_generated_with_config( + seed: DstSeed, + config: RunConfig, +) -> anyhow::Result { + let source = ConcurrentWorkloadSource::new(seed, config.max_interactions_or_default(usize::MAX)); + let engine = ConcurrentRelationalDbEngine::new()?; + let outcome = core::run_streaming(source, engine, ConcurrentProperties, config).await?; + info!( + rounds = outcome.rounds, + committed = outcome.committed, + conflicts = outcome.write_conflicts, + "relational_db_concurrent complete" + ); + Ok(outcome) +} + +#[derive(Clone, Debug)] +struct RoundPlan { + id: u64, + kind: RoundKind, + shared: SimRow, + extra: SimRow, +} + +#[derive(Clone, Copy, Debug)] +enum RoundKind { + WriterBlocksWriter, + ReadersBlockWriter, + MultiReaderSnapshot, + MixedReadWrite, +} + +struct ConcurrentWorkloadSource { + rng: DstRng, + emitted: usize, + target: usize, + next_id: u64, +} + +impl ConcurrentWorkloadSource { + fn new(seed: DstSeed, target: usize) -> Self { + Self { + rng: seed.fork(910).rng(), + emitted: 0, + target, + next_id: seed.fork(911).0.max(1), + } + } + + fn make_row(&mut self) -> SimRow { + let id = self.next_id; + self.next_id = self.next_id.wrapping_add(1).max(1); + SimRow { + values: vec![ + AlgebraicValue::U64(id), + AlgebraicValue::U64(self.rng.next_u64() % 1_000), + ], + } + } + + fn make_round(&mut self, id: u64) -> RoundPlan { + RoundPlan { + id, + kind: match id % 4 { + 0 => RoundKind::WriterBlocksWriter, + 1 => RoundKind::ReadersBlockWriter, + 2 => RoundKind::MultiReaderSnapshot, + _ => RoundKind::MixedReadWrite, + }, + shared: self.make_row(), + extra: self.make_row(), + } + } +} + +impl WorkloadSource for ConcurrentWorkloadSource { + type Interaction = RoundPlan; + + fn next_interaction(&mut self) -> Option { + if self.emitted >= self.target { + return None; + } + let round = self.make_round(self.emitted as u64); + self.emitted += 1; + Some(round) + } + + fn request_finish(&mut self) { + self.target = self.emitted; + } +} + +struct ConcurrentRelationalDbEngine { + db: RelationalDB, + table_id: TableId, + events: Vec, +} + +impl ConcurrentRelationalDbEngine { + fn new() -> anyhow::Result { + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + EmptyHistory::new(), + None, + None, + PagePool::new_for_test(), + )?; + assert_eq!(connected_clients.len(), 0); + db.with_auto_commit(Workload::Internal, |tx| { + db.set_initialized(tx, spacetimedb_datastore::traits::Program::empty(HostType::Wasm.into())) + })?; + + let table_id = install_concurrent_schema(&db)?; + Ok(Self { + db, + table_id, + events: Vec::new(), + }) + } + + fn execute_round(&mut self, round: &RoundPlan) -> Result { + let mut machine = RoundMachine::new(&self.db, self.table_id, round.id, 4); + let events = machine.run(round)?; + self.events.extend(events.clone()); + Ok(RoundObservation { + round: round.id, + events, + }) + } + + fn collect_rows(&self) -> Result, String> { + let tx = self.db.begin_tx(Workload::ForTests); + let result = collect_rows_in_tx(&self.db, self.table_id, &tx, "collect rows"); + let _ = self.db.release_tx(tx); + result + } +} + +impl TargetEngine for ConcurrentRelationalDbEngine { + type Observation = RoundObservation; + type Outcome = RelationalDbConcurrentOutcome; + type Error = String; + + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a RoundPlan, + ) -> impl Future> + 'a { + async move { self.execute_round(interaction) } + } + + fn finish(&mut self) {} + + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a { + async move { + let final_rows = self.collect_rows().map_err(anyhow::Error::msg)?; + let expected_rows = expected_rows_from_events(&self.events); + let summary = ConcurrentSummary::from_events(&self.events); + Ok(RelationalDbConcurrentOutcome { + rounds: summary.rounds, + clients: summary.clients, + events: summary.events, + reads: summary.reads, + committed: summary.committed, + write_conflicts: summary.write_conflicts, + writer_conflicts: summary.writer_conflicts, + reader_conflicts: summary.reader_conflicts, + final_rows, + expected_rows, + }) + } + } +} + +struct RoundMachine<'a> { + db: &'a RelationalDB, + table_id: TableId, + round: u64, + clients: Vec, + events: Vec, +} + +impl<'a> RoundMachine<'a> { + fn new(db: &'a RelationalDB, table_id: TableId, round: u64, clients: usize) -> Self { + Self { + db, + table_id, + round, + clients: (0..clients).map(|_| ClientState::Idle).collect(), + events: Vec::new(), + } + } + + fn run(&mut self, round: &RoundPlan) -> Result, String> { + let result = match round.kind { + RoundKind::WriterBlocksWriter => self.writer_blocks_writer(round), + RoundKind::ReadersBlockWriter => self.readers_block_writer(round), + RoundKind::MultiReaderSnapshot => self.multi_reader_snapshot(round), + RoundKind::MixedReadWrite => self.mixed_read_write(round), + }; + let cleanup = self.cleanup(); + result.and(cleanup)?; + Ok(std::mem::take(&mut self.events)) + } + + fn writer_blocks_writer(&mut self, round: &RoundPlan) -> Result<(), String> { + self.begin_write(client(0))?; + self.insert(client(0), round.shared.clone())?; + self.expect_write_conflict(client(1), ConflictReason::WriterHeld)?; + self.commit(client(0))?; + + self.begin_write(client(1))?; + self.insert(client(1), round.extra.clone())?; + self.commit(client(1)) + } + + fn readers_block_writer(&mut self, round: &RoundPlan) -> Result<(), String> { + self.begin_read(client(0))?; + self.begin_read(client(1))?; + self.full_scan(client(0))?; + self.full_scan(client(1))?; + self.expect_write_conflict(client(2), ConflictReason::ReadersHeld)?; + self.release_read(client(0))?; + self.release_read(client(1))?; + + self.begin_write(client(2))?; + self.insert(client(2), round.shared.clone())?; + self.commit(client(2)) + } + + fn multi_reader_snapshot(&mut self, round: &RoundPlan) -> Result<(), String> { + self.begin_read(client(0))?; + self.begin_read(client(1))?; + let rows_0 = self.full_scan(client(0))?; + let rows_1 = self.full_scan(client(1))?; + if rows_0 != rows_1 { + return Err(format!( + "[ConcurrentRelationalDb] round={} readers observed different snapshots: left={rows_0:?} right={rows_1:?}", + self.round + )); + } + self.release_read(client(0))?; + self.release_read(client(1))?; + + self.begin_write(client(2))?; + self.insert(client(2), round.shared.clone())?; + self.commit(client(2))?; + + self.begin_read(client(3))?; + self.point_lookup(client(3), round.shared.id().ok_or("generated row missing id")?)?; + self.release_read(client(3)) + } + + fn mixed_read_write(&mut self, round: &RoundPlan) -> Result<(), String> { + self.begin_write(client(0))?; + self.insert(client(0), round.shared.clone())?; + self.commit(client(0))?; + + self.begin_read(client(1))?; + self.point_lookup(client(1), round.shared.id().ok_or("generated row missing id")?)?; + self.release_read(client(1))?; + + self.begin_write(client(2))?; + self.delete(client(2), round.shared.clone())?; + self.rollback(client(2)); + + self.begin_write(client(3))?; + self.insert(client(3), round.extra.clone())?; + self.commit(client(3)) + } + + fn begin_read(&mut self, client: SessionId) -> Result<(), String> { + if self.any_writer() { + return Err(format!( + "[ConcurrentRelationalDb] round={} client={} would block beginning read while writer is held", + self.round, client + )); + } + self.expect_idle(client, "begin_read")?; + self.record_action(client, "begin_read"); + let tx = self.db.begin_tx(Workload::ForTests); + self.replace(client, ClientState::Reading { tx }); + Ok(()) + } + + fn release_read(&mut self, client: SessionId) -> Result<(), String> { + self.record_action(client, "release_read"); + match self.take(client)? { + ClientState::Reading { tx } => { + let _ = self.db.release_tx(tx); + self.replace(client, ClientState::Idle); + Ok(()) + } + state => { + self.replace(client, state); + Err(self.invalid_state(client, "release_read")) + } + } + } + + fn begin_write(&mut self, client: SessionId) -> Result<(), String> { + if self.try_begin_write(client)? { + Ok(()) + } else { + Err(format!( + "[ConcurrentRelationalDb] round={} client={} expected write lock to be available", + self.round, client + )) + } + } + + fn expect_write_conflict(&mut self, client: SessionId, reason: ConflictReason) -> Result<(), String> { + if self.try_begin_write(client)? { + self.rollback(client); + return Err(format!( + "[ConcurrentRelationalDb] round={} client={} unexpectedly acquired write lock", + self.round, client + )); + } + match self.events.last() { + Some(RoundEvent::WriteConflict { reason: observed, .. }) if *observed == reason => Ok(()), + Some(event) => Err(format!( + "[ConcurrentRelationalDb] round={} expected conflict reason {reason:?}, observed {event}", + self.round + )), + None => Err(format!( + "[ConcurrentRelationalDb] round={} expected write conflict event", + self.round + )), + } + } + + fn try_begin_write(&mut self, client: SessionId) -> Result { + self.expect_idle(client, "try_begin_write")?; + self.record_action(client, "try_begin_write"); + match self + .db + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + { + Some(tx) => { + self.replace( + client, + ClientState::Writing { + tx, + pending: Vec::new(), + }, + ); + self.events.push(RoundEvent::WriteLockAcquired { + round: self.round, + client, + }); + Ok(true) + } + None => { + self.events.push(RoundEvent::WriteConflict { + round: self.round, + client, + reason: self.conflict_reason(), + }); + Ok(false) + } + } + } + + fn insert(&mut self, client: SessionId, row: SimRow) -> Result<(), String> { + self.record_action(client, "insert"); + let table_id = self.table_id; + let db = self.db; + self.with_writer(client, |tx, pending| { + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + match db.insert(tx, table_id, &bsatn) { + Ok((_, row_ref, _)) => { + pending.push(ConcurrentMutation::Inserted(SimRow::from_product_value( + row_ref.to_product_value(), + ))); + Ok(()) + } + Err(err) if is_unique_constraint_violation(&err) => Ok(()), + Err(err) => Err(format!("insert failed: {err}")), + } + }) + } + + fn delete(&mut self, client: SessionId, row: SimRow) -> Result<(), String> { + self.record_action(client, "delete"); + let table_id = self.table_id; + let db = self.db; + self.with_writer(client, |tx, pending| { + match db.delete_by_rel(tx, table_id, [row.to_product_value()]) { + 0 => Ok(()), + 1 => { + pending.push(ConcurrentMutation::Deleted(row)); + Ok(()) + } + deleted => Err(format!("delete affected {deleted} rows")), + } + }) + } + + fn commit(&mut self, client: SessionId) -> Result<(), String> { + self.record_action(client, "commit"); + match self.take(client)? { + ClientState::Writing { tx, mut pending } => { + let committed = self + .db + .commit_tx(tx) + .map_err(|err| format!("commit failed: {err}"))? + .ok_or_else(|| "commit returned no tx data".to_string())?; + self.events.push(RoundEvent::Committed { + round: self.round, + client, + tx_offset: committed.0, + mutations: std::mem::take(&mut pending), + }); + self.replace(client, ClientState::Idle); + Ok(()) + } + state => { + self.replace(client, state); + Err(self.invalid_state(client, "commit")) + } + } + } + + fn rollback(&mut self, client: SessionId) { + self.record_action(client, "rollback"); + match self.take(client) { + Ok(ClientState::Writing { tx, .. }) => { + let _ = self.db.rollback_mut_tx(tx); + self.events.push(RoundEvent::RolledBack { + round: self.round, + client, + }); + self.replace(client, ClientState::Idle); + } + Ok(state) => self.replace(client, state), + Err(_) => {} + } + } + + fn full_scan(&mut self, client: SessionId) -> Result, String> { + self.record_action(client, "full_scan"); + let rows = self.with_reader(client, |tx| collect_rows_in_tx(self.db, self.table_id, tx, "full scan"))?; + self.events.push(RoundEvent::Read { + round: self.round, + client, + kind: ReadKind::FullScan, + rows: rows.clone(), + }); + Ok(rows) + } + + fn point_lookup(&mut self, client: SessionId, id: u64) -> Result, String> { + self.record_action(client, "point_lookup"); + let rows = self + .with_reader(client, |tx| { + collect_rows_in_tx(self.db, self.table_id, tx, "point lookup") + })? + .into_iter() + .filter(|row| row.id() == Some(id)) + .collect::>(); + self.events.push(RoundEvent::Read { + round: self.round, + client, + kind: ReadKind::PointLookup { id }, + rows: rows.clone(), + }); + Ok(rows) + } + + fn with_writer( + &mut self, + client: SessionId, + f: impl FnOnce(&mut RelMutTx, &mut Vec) -> Result, + ) -> Result { + match self.state_mut(client)? { + ClientState::Writing { tx, pending } => f(tx, pending), + _ => Err(self.invalid_state(client, "write operation")), + } + } + + fn with_reader(&self, client: SessionId, f: impl FnOnce(&RelTx) -> Result) -> Result { + match self.state(client)? { + ClientState::Reading { tx } => f(tx), + _ => Err(self.invalid_state(client, "read operation")), + } + } + + fn cleanup(&mut self) -> Result<(), String> { + let mut leaked = None; + for index in 0..self.clients.len() { + let client = SessionId::from_index(index); + match self.take(client)? { + ClientState::Idle => self.replace(client, ClientState::Idle), + ClientState::Reading { tx } => { + let _ = self.db.release_tx(tx); + self.replace(client, ClientState::Idle); + leaked.get_or_insert_with(|| { + format!( + "[ConcurrentRelationalDb] round={} client={} leaked read transaction", + self.round, client + ) + }); + } + ClientState::Writing { tx, .. } => { + let _ = self.db.rollback_mut_tx(tx); + self.replace(client, ClientState::Idle); + leaked.get_or_insert_with(|| { + format!( + "[ConcurrentRelationalDb] round={} client={} leaked write transaction", + self.round, client + ) + }); + } + } + } + match leaked { + Some(err) => Err(err), + None => Ok(()), + } + } + + fn conflict_reason(&self) -> ConflictReason { + if self.any_writer() { + ConflictReason::WriterHeld + } else if self.any_reader() { + ConflictReason::ReadersHeld + } else { + ConflictReason::Unknown + } + } + + fn any_reader(&self) -> bool { + self.clients.iter().any(ClientState::is_reading) + } + + fn any_writer(&self) -> bool { + self.clients.iter().any(ClientState::is_writing) + } + + fn expect_idle(&self, client: SessionId, action: &'static str) -> Result<(), String> { + if self.state(client)?.is_idle() { + Ok(()) + } else { + Err(self.invalid_state(client, action)) + } + } + + fn record_action(&mut self, client: SessionId, name: &'static str) { + self.events.push(RoundEvent::Action { + round: self.round, + client, + name, + }); + } + + fn state(&self, client: SessionId) -> Result<&ClientState, String> { + self.clients + .get(client.as_index()) + .ok_or_else(|| format!("[ConcurrentRelationalDb] unknown client {client}")) + } + + fn state_mut(&mut self, client: SessionId) -> Result<&mut ClientState, String> { + self.clients + .get_mut(client.as_index()) + .ok_or_else(|| format!("[ConcurrentRelationalDb] unknown client {client}")) + } + + fn take(&mut self, client: SessionId) -> Result { + let state = self.state_mut(client)?; + Ok(std::mem::replace(state, ClientState::Idle)) + } + + fn replace(&mut self, client: SessionId, state: ClientState) { + self.clients[client.as_index()] = state; + } + + fn invalid_state(&self, client: SessionId, action: &str) -> String { + format!( + "[ConcurrentRelationalDb] round={} client={} cannot {action} from {}", + self.round, + client, + self.state(client).map(ClientState::name).unwrap_or("unknown") + ) + } +} + +enum ClientState { + Idle, + Reading { + tx: RelTx, + }, + Writing { + tx: RelMutTx, + pending: Vec, + }, +} + +impl ClientState { + fn name(&self) -> &'static str { + match self { + Self::Idle => "idle", + Self::Reading { .. } => "reading", + Self::Writing { .. } => "writing", + } + } + + fn is_idle(&self) -> bool { + matches!(self, Self::Idle) + } + + fn is_reading(&self) -> bool { + matches!(self, Self::Reading { .. }) + } + + fn is_writing(&self) -> bool { + matches!(self, Self::Writing { .. }) + } +} + +#[derive(Clone, Debug)] +struct RoundObservation { + round: u64, + events: Vec, +} + +#[derive(Clone, Debug)] +pub struct RelationalDbConcurrentOutcome { + pub rounds: usize, + pub clients: usize, + pub events: usize, + pub reads: usize, + pub committed: usize, + pub write_conflicts: usize, + pub writer_conflicts: usize, + pub reader_conflicts: usize, + pub final_rows: Vec, + pub expected_rows: Vec, +} + +#[derive(Clone, Debug)] +enum RoundEvent { + Action { + round: u64, + client: SessionId, + name: &'static str, + }, + WriteLockAcquired { + round: u64, + client: SessionId, + }, + WriteConflict { + round: u64, + client: SessionId, + reason: ConflictReason, + }, + Committed { + round: u64, + client: SessionId, + tx_offset: u64, + mutations: Vec, + }, + RolledBack { + round: u64, + client: SessionId, + }, + Read { + round: u64, + client: SessionId, + kind: ReadKind, + rows: Vec, + }, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum ConflictReason { + WriterHeld, + ReadersHeld, + Unknown, +} + +#[derive(Clone, Debug)] +enum ReadKind { + FullScan, + PointLookup { id: u64 }, +} + +#[derive(Clone, Debug)] +enum ConcurrentMutation { + Inserted(SimRow), + Deleted(SimRow), +} + +#[derive(Default)] +struct ConcurrentSummary { + rounds: usize, + clients: usize, + events: usize, + reads: usize, + committed: usize, + write_conflicts: usize, + writer_conflicts: usize, + reader_conflicts: usize, +} + +impl ConcurrentSummary { + fn from_events(events: &[RoundEvent]) -> Self { + let mut summary = Self::default(); + let mut max_round = None; + let mut max_client = None; + + for event in events { + summary.events += 1; + let (round, client) = event.position(); + max_round = Some(max_round.unwrap_or(round).max(round)); + max_client = Some(max_client.unwrap_or(client.as_index()).max(client.as_index())); + + match event { + RoundEvent::WriteConflict { reason, .. } => { + summary.write_conflicts += 1; + match reason { + ConflictReason::WriterHeld => summary.writer_conflicts += 1, + ConflictReason::ReadersHeld => summary.reader_conflicts += 1, + ConflictReason::Unknown => {} + } + } + RoundEvent::Committed { .. } => summary.committed += 1, + RoundEvent::Read { .. } => summary.reads += 1, + RoundEvent::Action { .. } | RoundEvent::WriteLockAcquired { .. } | RoundEvent::RolledBack { .. } => {} + } + } + + summary.rounds = max_round.map(|round| round as usize + 1).unwrap_or_default(); + summary.clients = max_client.map(|client| client + 1).unwrap_or_default(); + summary + } +} + +impl RoundEvent { + fn position(&self) -> (u64, SessionId) { + match self { + Self::Action { round, client, .. } + | Self::WriteLockAcquired { round, client } + | Self::WriteConflict { round, client, .. } + | Self::Committed { round, client, .. } + | Self::RolledBack { round, client } + | Self::Read { round, client, .. } => (*round, *client), + } + } +} + +struct ConcurrentProperties; + +impl StreamingProperties for ConcurrentProperties { + fn observe( + &mut self, + _engine: &ConcurrentRelationalDbEngine, + _interaction: &RoundPlan, + observation: &RoundObservation, + ) -> Result<(), String> { + if observation.events.is_empty() { + return Err(format!( + "[ConcurrentRelationalDb] round={} produced no events", + observation.round + )); + } + + for event in &observation.events { + if let RoundEvent::Read { + kind: ReadKind::PointLookup { id }, + rows, + .. + } = event + { + if rows.len() > 1 || rows.iter().any(|row| row.id() != Some(*id)) { + return Err(format!( + "[ConcurrentRelationalDb] round={} invalid point lookup id={id}: {rows:?}", + observation.round + )); + } + } + } + Ok(()) + } + + fn finish( + &mut self, + _engine: &ConcurrentRelationalDbEngine, + outcome: &RelationalDbConcurrentOutcome, + ) -> Result<(), String> { + if outcome.final_rows != outcome.expected_rows { + return Err(format!( + "[ConcurrentRelationalDb] final rows differ from commit-offset oracle: expected={:?} actual={:?}", + outcome.expected_rows, outcome.final_rows + )); + } + if outcome.writer_conflicts == 0 { + return Err("[ConcurrentRelationalDb] no writer-held lock contention was observed".to_string()); + } + if outcome.reader_conflicts == 0 { + return Err("[ConcurrentRelationalDb] no reader-held lock contention was observed".to_string()); + } + if outcome.reads == 0 { + return Err("[ConcurrentRelationalDb] no read sections were observed".to_string()); + } + Ok(()) + } +} + +fn collect_rows_in_tx( + db: &RelationalDB, + table_id: TableId, + tx: &RelTx, + label: &'static str, +) -> Result, String> { + let mut rows = db + .iter(tx, table_id) + .map_err(|err| format!("{label} failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) +} + +fn expected_rows_from_events(events: &[RoundEvent]) -> Vec { + let mut commits = events + .iter() + .filter_map(|event| match event { + RoundEvent::Committed { + tx_offset, mutations, .. + } => Some((*tx_offset, mutations)), + _ => None, + }) + .collect::>(); + commits.sort_by_key(|(tx_offset, _)| *tx_offset); + + let mut rows = BTreeMap::::new(); + for (_tx_offset, mutations) in commits { + for mutation in mutations { + match mutation { + ConcurrentMutation::Inserted(row) => { + if let Some(id) = row.id() { + rows.insert(id, row.clone()); + } + } + ConcurrentMutation::Deleted(row) => { + if let Some(id) = row.id() { + rows.remove(&id); + } + } + } + } + } + rows.into_values().collect() +} + +fn install_concurrent_schema(db: &RelationalDB) -> anyhow::Result { + let mut tx = db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + let table_id = db.create_table( + &mut tx, + TableSchema::new( + TableId::SENTINEL, + TableName::for_test("concurrent_rows"), + None, + vec![ + ColumnSchema::for_test(0, "id", spacetimedb_sats::AlgebraicType::U64), + ColumnSchema::for_test(1, "value", spacetimedb_sats::AlgebraicType::U64), + ], + vec![IndexSchema::for_test("concurrent_rows_id_idx", BTreeAlgorithm::from(0))], + vec![ConstraintSchema::unique_for_test("concurrent_rows_id_unique", 0)], + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ), + )?; + let _ = db.commit_tx(tx)?; + Ok(table_id) +} + +fn client(index: usize) -> SessionId { + SessionId::from_index(index) +} + +fn is_unique_constraint_violation(err: &DBError) -> bool { + err.to_string().contains("Unique") || err.to_string().contains("unique") +} + +impl fmt::Display for RoundEvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Action { name, .. } => write!(f, "action({name})"), + event => write!(f, "{event:?}"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sim; + + #[test] + fn seed_12_exercises_lock_state_machine() { + let seed = DstSeed(12); + let config = RunConfig::with_max_interactions(100); + let mut runtime = sim::Runtime::new(seed).unwrap(); + + let outcome = runtime.block_on(run_generated_with_config(seed, config)).unwrap(); + + assert_eq!(outcome.rounds, 100); + assert!(outcome.committed > 0); + assert!(outcome.writer_conflicts > 0); + assert!(outcome.reader_conflicts > 0); + assert!(outcome.reads > 0); + assert_eq!(outcome.final_rows, outcome.expected_rows); + } + + #[test] + fn first_four_rounds_cover_core_lock_cases() { + let seed = DstSeed(12); + let config = RunConfig::with_max_interactions(4); + let mut runtime = sim::Runtime::new(seed).unwrap(); + + let outcome = runtime.block_on(run_generated_with_config(seed, config)).unwrap(); + + assert_eq!(outcome.rounds, 4); + assert_eq!(outcome.writer_conflicts, 1); + assert_eq!(outcome.reader_conflicts, 1); + assert!(outcome.reads >= 4); + assert_eq!(outcome.final_rows, outcome.expected_rows); + } +} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index 7ee5ba00601..dec276060b2 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -66,10 +66,6 @@ impl<'a> ScenarioPlanner<'a> { Percent::new(percent).sample(self.rng) } - pub fn connection_count(&self) -> usize { - self.model.connections.len() - } - pub fn active_writer(&self) -> Option { self.model.active_writer() } @@ -121,12 +117,15 @@ impl<'a> ScenarioPlanner<'a> { .sample(self.rng) { TxControlAction::Begin - if !self.model.connections[conn.as_index()].in_tx - && !self.model.has_read_tx(conn) - && self.model.active_writer().is_none() => + if !self.model.connections[conn.as_index()].in_tx && !self.model.has_read_tx(conn) => { - self.model.begin_tx(conn); - self.pending.push_back(TableWorkloadInteraction::begin_tx(conn)); + if self.model.active_writer().is_none() && !self.model.any_read_tx() { + self.model.begin_tx(conn); + self.pending.push_back(TableWorkloadInteraction::begin_tx(conn)); + } else { + self.pending + .push_back(TableWorkloadInteraction::begin_tx_conflict(conn)); + } true } TxControlAction::Commit if self.model.connections[conn.as_index()].in_tx => { @@ -251,26 +250,14 @@ impl TableWorkloadSource { return; } - // Locking targets allow only one writer at a time. If a writer is - // already open, keep driving that same connection until it commits or - // rolls back. Otherwise pick a fresh connection uniformly. - let conn = if let Some(active_writer) = self.model.active_writer() { - active_writer - } else if let Some(read_conn) = (0..self.num_connections) - .map(SessionId::from_index) - .find(|&conn| self.model.has_read_tx(conn)) - { - // The current RelationalDB target can block when a write transaction - // starts behind an open read transaction. Keep driving the snapshot - // holder until it releases; interleaved read/write snapshots should - // be reintroduced once the target models that lock behavior. - read_conn - } else { - ConnectionChoice { - connection_count: self.num_connections, - } - .sample(&mut self.rng) - }; + // Transactions stay open across interactions, but each API call is a + // separate synchronous step. Always choose a connection uniformly so + // later steps can naturally observe lock contention instead of the + // planner steering around open readers or writers. + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng); let mut planner = ScenarioPlanner { rng: &mut self.rng, model: &mut self.model, diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index 41a007c9930..211dc55a4ea 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -278,7 +278,9 @@ impl TableOracle { if self.connections[conn.as_index()].in_tx { return Err(format!("connection {conn} already has open write tx")); } - if self.active_writer.is_some() { + if self.active_writer.is_some() + || self.connections.iter().any(|connection| connection.read_snapshot.is_some()) + { return Ok(PredictedOutcome::Error { kind: TableErrorKind::WriteConflict, subject: None, diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index e0b6ef3eecf..49c96f150a9 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -30,7 +30,6 @@ struct TableWorkloadProfile { begin_read_tx_pct: usize, release_read_tx_pct: usize, empty_tx_pct: usize, - write_conflict_pct: usize, exact_duplicate_insert_pct: usize, unique_key_conflict_insert_pct: usize, add_column_pct: usize, @@ -55,7 +54,6 @@ const RANDOM_CRUD_PROFILE: TableWorkloadProfile = TableWorkloadProfile { begin_read_tx_pct: 4, release_read_tx_pct: 35, empty_tx_pct: 2, - write_conflict_pct: 8, exact_duplicate_insert_pct: 4, unique_key_conflict_insert_pct: 4, add_column_pct: 1, @@ -80,7 +78,6 @@ const INDEXED_RANGES_PROFILE: TableWorkloadProfile = TableWorkloadProfile { begin_read_tx_pct: 6, release_read_tx_pct: 30, empty_tx_pct: 2, - write_conflict_pct: 10, exact_duplicate_insert_pct: 3, unique_key_conflict_insert_pct: 4, add_column_pct: 2, @@ -178,13 +175,6 @@ fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: SessionId, return; } - if let Some(owner) = planner.active_writer() - && planner.roll_percent(profile.write_conflict_pct) - && emit_write_conflict(planner, owner) - { - return; - } - if planner.active_writer().is_none() { if planner.roll_percent(profile.empty_tx_pct) { let rollback = planner.roll_percent(50); @@ -304,26 +294,6 @@ fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: SessionId, planner.push_interaction(TableWorkloadInteraction::delete(conn, table, row)); } -fn emit_write_conflict(planner: &mut ScenarioPlanner<'_>, owner: SessionId) -> bool { - let candidates = (0..planner.connection_count()) - .map(SessionId::from_index) - .filter(|&conn| conn != owner && !planner.has_read_tx(conn)) - .collect::>(); - if candidates.is_empty() { - return false; - } - let conn = candidates[planner.choose_index(candidates.len())]; - if planner.roll_percent(50) { - planner.push_interaction(TableWorkloadInteraction::begin_tx_conflict(conn)); - return true; - } - - let table = planner.choose_table(); - let row = planner.make_row(table); - planner.push_interaction(TableWorkloadInteraction::write_conflict_insert(conn, table, row)); - true -} - fn emit_add_column(planner: &mut ScenarioPlanner<'_>, conn: SessionId, table: usize) -> bool { const MAX_COLUMNS_PER_TABLE: usize = 12; let column_idx = planner.table_plan(table).columns.len(); From 0b9875fcb717c515cf7f6eb89959df66102aa69e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 7 May 2026 17:49:40 +0530 Subject: [PATCH 34/74] cleanup --- crates/core/src/db/relational_db.rs | 18 +- crates/core/src/db/update.rs | 2 +- .../src/locking_tx_datastore/datastore.rs | 1 + crates/dst/Cargo.toml | 2 +- crates/dst/src/sim/commitlog.rs | 6 +- .../src/targets/relational_db_commitlog.rs | 20 +- crates/durability/Cargo.toml | 1 + crates/durability/src/imp/local.rs | 73 +++---- crates/durability/src/imp/mod.rs | 123 +----------- crates/durability/tests/io/fallocate.rs | 2 +- crates/runtime/README.md | 182 ++++++++++++++++++ 11 files changed, 250 insertions(+), 180 deletions(-) create mode 100644 crates/runtime/README.md diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 5907a8ddf4c..bc3227f5e82 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -778,6 +778,7 @@ impl RelationalDB { r } + #[cfg(any(feature = "test", test))] #[tracing::instrument(level = "trace", skip_all)] pub fn try_begin_mut_tx(&self, isolation_level: IsolationLevel, workload: Workload) -> Option { log::trace!("TRY BEGIN MUT TX"); @@ -1028,7 +1029,7 @@ impl RelationalDB { Ok(self.inner.alter_table_row_type_mut_tx(tx, table_id, column_schemas)?) } - pub fn add_columns_to_table( + pub(crate) fn add_columns_to_table_mut_tx( &self, tx: &mut MutTx, table_id: TableId, @@ -1040,6 +1041,17 @@ impl RelationalDB { .add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values)?) } + #[cfg(any(feature = "test", test))] + pub fn add_columns_to_table( + &self, + tx: &mut MutTx, + table_id: TableId, + column_schemas: Vec, + default_values: Vec, + ) -> Result { + self.add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values) + } + /// Reports the `TxMetrics`s passed. /// /// Should only be called after the tx lock has been fully released. @@ -1666,7 +1678,7 @@ pub async fn local_durability( replica_dir: ReplicaDir, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { - let rt = tokio::runtime::Handle::current(); + let runtime = RuntimeDispatch::tokio_current(); let on_new_segment = snapshot_worker.map(|snapshot_worker| { let snapshot_worker = snapshot_worker.clone(); Arc::new(move || { @@ -1678,7 +1690,7 @@ pub async fn local_durability( let local = asyncify(move || { durability::Local::open( replica_dir.clone(), - rt, + runtime, <_>::default(), // Give the durability a handle to request a new snapshot run, // which it will send down whenever we rotate commitlog segments. diff --git a/crates/core/src/db/update.rs b/crates/core/src/db/update.rs index 6c7c3bd9fc8..f9ca4c110d9 100644 --- a/crates/core/src/db/update.rs +++ b/crates/core/src/db/update.rs @@ -317,7 +317,7 @@ fn auto_migrate_database( .iter() .filter_map(|col_def| col_def.default_value.clone()) .collect(); - stdb.add_columns_to_table(tx, table_id, column_schemas, default_values)?; + stdb.add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values)?; } spacetimedb_schema::auto_migrate::AutoMigrateStep::DisconnectAllUsers => { log!(logger, "Disconnecting all users"); diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index fd1da554c88..bd6439d8030 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -946,6 +946,7 @@ impl MutTx for Locking { } impl Locking { + #[cfg(any(feature = "test", test))] pub fn try_begin_mut_tx(&self, _isolation_level: IsolationLevel, workload: Workload) -> Option { let metrics = ExecutionMetrics::default(); let ctx = ExecutionContext::with_workload(self.database_identity, workload); diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index 4a30f6d6a6d..add6ccd36ad 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -19,7 +19,7 @@ anyhow.workspace = true clap.workspace = true futures-util.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } -spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0" } +spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0", features = ["test"] } spacetimedb-commitlog = { workspace = true, features = ["test"] } spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0", features = ["test"] } spacetimedb-lib.workspace = true diff --git a/crates/dst/src/sim/commitlog.rs b/crates/dst/src/sim/commitlog.rs index a7d98bcc852..eead1b859f7 100644 --- a/crates/dst/src/sim/commitlog.rs +++ b/crates/dst/src/sim/commitlog.rs @@ -7,7 +7,7 @@ use std::{ use spacetimedb_commitlog::{ repo::{Repo, RepoWithoutLockFile, SegmentLen, SegmentReader, TxOffset, TxOffsetIndex, TxOffsetIndexMut}, - segment::FileLike, + segment::{FileLike, Header}, }; use crate::{ @@ -69,11 +69,11 @@ impl Repo for FaultableRepo { type SegmentWriter = FaultableSegment; type SegmentReader = FaultableReader; - fn create_segment(&self, offset: u64) -> io::Result { + fn create_segment(&self, offset: u64, header: Header) -> io::Result { self.faults.maybe_latency(); self.faults.maybe_error(StorageFaultKind::Open)?; self.inner - .create_segment(offset) + .create_segment(offset, header) .map(|inner| FaultableSegment::new(inner, self.faults.clone())) } diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 5487befc8ef..4d6e0ce5656 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -12,7 +12,7 @@ use spacetimedb_datastore::{ execution_context::Workload, traits::{IsolationLevel, Program}, }; -use spacetimedb_durability::{DirectLocal, Durability, EmptyHistory}; +use spacetimedb_durability::{Durability, EmptyHistory, Local}; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, @@ -427,8 +427,12 @@ impl RelationalDbEngine { fn reopen_from_history(&self) -> Result { let durability = Arc::new( - InMemoryCommitlogDurability::open_with_repo(self.commitlog_repo.clone(), self.durability_opts) - .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, + InMemoryCommitlogDurability::open_with_repo( + self.commitlog_repo.clone(), + spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), + self.durability_opts, + ) + .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, ); let durable_offset = durability.durable_tx_offset().last_seen(); let snapshot_restore = self.snapshot_repo.repo_for_restore(durable_offset)?; @@ -1468,7 +1472,7 @@ impl TargetEngine for RelationalDbEngine { type StressCommitlogRepo = FaultableRepo; type StressSnapshotRepo = BuggifiedSnapshotRepo; -type InMemoryCommitlogDurability = DirectLocal; +type InMemoryCommitlogDurability = Local; struct RelationalDbBootstrap { db: RelationalDB, @@ -1492,8 +1496,12 @@ fn bootstrap_relational_db( let snapshot_repo = BuggifiedSnapshotRepo::new(snapshot_fault_config, seed.fork(703))?; let durability_opts = commitlog_stress_options(seed.fork(701)); let durability = Arc::new( - InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), durability_opts) - .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?, + InMemoryCommitlogDurability::open_with_repo( + commitlog_repo.clone(), + spacetimedb_core::runtime::RuntimeDispatch::simulation_current(), + durability_opts, + ) + .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?, ); let snapshot_worker = SnapshotWorker::new( Arc::new(snapshot_repo.clone()), diff --git a/crates/durability/Cargo.toml b/crates/durability/Cargo.toml index 0ea8022fcbe..4eaa3870001 100644 --- a/crates/durability/Cargo.toml +++ b/crates/durability/Cargo.toml @@ -21,6 +21,7 @@ scopeguard.workspace = true spacetimedb-commitlog.workspace = true spacetimedb-fs-utils.workspace = true spacetimedb-paths.workspace = true +spacetimedb-runtime = { workspace = true, features = ["tokio"] } spacetimedb-sats.workspace = true thiserror.workspace = true tokio.workspace = true diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index c3a36f8694f..51d89e2e848 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -10,7 +10,6 @@ use std::{ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; -use scopeguard::ScopeGuard; use spacetimedb_commitlog::{ error, payload::Txdata, @@ -19,11 +18,9 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; +use spacetimedb_runtime::RuntimeDispatch; use thiserror::Error; -use tokio::{ - sync::watch, - task::{spawn_blocking, JoinHandle}, -}; +use tokio::sync::{oneshot, watch}; use tracing::{instrument, Span}; use crate::{Close, Durability, DurableOffset, History, PreparedTx, TxOffset}; @@ -109,9 +106,9 @@ where /// This is mainly for observability purposes, and can thus be updated with /// relaxed memory ordering. queue_depth: Arc, - /// [JoinHandle] for the actor task. Contains `None` if already cancelled - /// (via [Durability::close]). - actor: Mutex>>, + /// Completion notification for the background actor. Contains `None` once + /// consumed by [`Durability::close`]. + actor_done: Mutex>>, } impl Local { @@ -119,13 +116,13 @@ impl Local { /// /// `replica_dir` must already exist. /// - /// Background tasks are spawned onto the provided tokio runtime. + /// Background tasks are spawned onto the provided runtime. /// /// We will send a message down the `on_new_segment` channel whenever we begin a new commitlog segment. /// This is used to capture a snapshot each new segment. pub fn open( replica_dir: ReplicaDir, - rt: tokio::runtime::Handle, + runtime: RuntimeDispatch, opts: Options, on_new_segment: Option>, ) -> Result { @@ -140,7 +137,7 @@ impl Local { opts.commitlog, on_new_segment, )?); - Self::open_inner(clog, rt, opts, Some(lock)) + Self::open_inner(clog, runtime, opts, Some(lock)) } } @@ -151,7 +148,7 @@ where { fn open_inner( clog: Arc, R>>, - rt: tokio::runtime::Handle, + runtime: RuntimeDispatch, opts: Options, lock: Option, ) -> Result { @@ -159,19 +156,17 @@ where let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); let (durable_tx, durable_rx) = watch::channel(clog.max_committed_offset()); - - let actor = rt.spawn( + let (actor_done_tx, actor_done_rx) = oneshot::channel(); + runtime.spawn( Actor { clog: clog.clone(), - durable_offset: durable_tx, queue_depth: queue_depth.clone(), - batch_capacity: opts.batch_capacity, - + runtime: runtime.clone(), _lock: lock, } - .run(txdata_rx), + .run(txdata_rx, actor_done_tx), ); Ok(Self { @@ -179,7 +174,7 @@ where durable_offset: durable_rx, queue, queue_depth, - actor: Mutex::new(Some(actor)), + actor_done: Mutex::new(Some(actor_done_rx)), }) } @@ -195,10 +190,10 @@ where R: RepoWithoutLockFile + Send + Sync + 'static, { /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { + pub fn open_with_repo(repo: R, runtime: RuntimeDispatch, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); - Self::open_inner(clog, rt, opts, None) + Self::open_inner(clog, runtime, opts, None) } } @@ -246,6 +241,7 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, + runtime: RuntimeDispatch, _lock: Option, } @@ -256,7 +252,7 @@ where R: Repo + Send + Sync + 'static, { #[instrument(name = "durability::local::actor", skip_all)] - async fn run(self, transactions_rx: async_channel::Receiver>>) { + async fn run(self, transactions_rx: async_channel::Receiver>>, done: oneshot::Sender<()>) { info!("starting durability actor"); let mut tx_buf = Vec::with_capacity(self.batch_capacity.get()); @@ -280,14 +276,14 @@ where let clog = self.clog.clone(); let ready_len = tx_buf.len(); self.queue_depth.fetch_sub(ready_len as u64, Relaxed); - tx_buf = spawn_blocking(move || -> io::Result>>> { + let runtime = self.runtime.clone(); + tx_buf = runtime.spawn_blocking(move || -> io::Result>>> { for tx in tx_buf.drain(..) { clog.commit([tx.into_transaction()])?; } Ok(tx_buf) }) .await - .expect("commitlog write panicked") .expect("commitlog write failed"); if self.flush_and_sync().await.is_err() { sync_on_exit = false; @@ -304,6 +300,7 @@ where } info!("exiting durability actor"); + let _ = done.send(()); } #[instrument(skip_all)] @@ -317,12 +314,13 @@ where let clog = self.clog.clone(); let span = Span::current(); - spawn_blocking(move || { + let runtime = self.runtime.clone(); + runtime + .spawn_blocking(move || { let _span = span.enter(); clog.flush_and_sync() }) .await - .expect("commitlog flush-and-sync blocking task panicked") .inspect_err(|e| warn!("error flushing commitlog: {e:#}")) .inspect(|maybe_offset| { if let Some(new_offset) = maybe_offset { @@ -355,29 +353,14 @@ where info!("close local durability"); let durable_offset = self.durable_tx_offset(); - let maybe_actor = self.actor.lock().unwrap().take(); - // Abort actor if shutdown future is dropped. - let abort = scopeguard::guard( - maybe_actor.as_ref().map(|join_handle| join_handle.abort_handle()), - |maybe_abort_handle| { - if let Some(abort_handle) = maybe_abort_handle { - warn!("close future dropped, aborting durability actor"); - abort_handle.abort(); - } - }, - ); + let maybe_actor_done = self.actor_done.lock().unwrap().take(); self.queue.close(); async move { - if let Some(actor) = maybe_actor - && let Err(e) = actor.await + if let Some(actor_done) = maybe_actor_done + && actor_done.await.is_err() { - // Will print "durability actor: task was cancelled" - // or "durability actor: task panicked [...]" - warn!("durability actor: {e}"); + warn!("durability actor completion signal dropped"); } - // Don't abort if the actor completed. - let _ = ScopeGuard::into_inner(abort); - durable_offset.last_seen() } .boxed() diff --git a/crates/durability/src/imp/mod.rs b/crates/durability/src/imp/mod.rs index 4811f340b5a..77f0998e6f8 100644 --- a/crates/durability/src/imp/mod.rs +++ b/crates/durability/src/imp/mod.rs @@ -2,28 +2,20 @@ pub mod local; pub use local::Local; #[cfg(any(test, feature = "test"))] -pub use testing::{DirectLocal, NoDurability}; +pub use testing::NoDurability; #[cfg(any(test, feature = "test"))] mod testing { use std::{ future, marker::PhantomData, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Mutex, - }, + sync::atomic::{AtomicBool, Ordering}, }; use futures::FutureExt as _; - use spacetimedb_commitlog::{ - payload::Txdata, - repo::{Repo, RepoWithoutLockFile}, - Commitlog, Encode, - }; use tokio::sync::watch; - use crate::{local, Close, Durability, DurableOffset, History, PreparedTx, TxOffset}; + use crate::{Close, Durability, DurableOffset, PreparedTx, TxOffset}; /// A [`Durability`] impl that sends all transactions into the void. /// @@ -65,117 +57,8 @@ mod testing { } } - /// A commitlog-backed durability implementation that performs writes inline. - /// - /// This is intended for deterministic tests that want to inject their own - /// execution model instead of using [`local::Local`]'s Tokio actor. - pub struct DirectLocal - where - R: Repo, - { - clog: Arc, R>>, - durable_offset: watch::Sender>, - closed: AtomicBool, - write_lock: Mutex<()>, - } - - impl DirectLocal - where - T: Encode + Send + Sync + 'static, - R: RepoWithoutLockFile + Send + Sync + 'static, - { - pub fn open_with_repo(repo: R, opts: local::Options) -> Result { - let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); - let (durable_offset, _) = watch::channel(clog.max_committed_offset()); - Ok(Self { - clog, - durable_offset, - closed: AtomicBool::new(false), - write_lock: Mutex::new(()), - }) - } - - pub fn as_history(&self) -> impl History> + use { - self.clog.clone() - } - } - - impl DirectLocal - where - T: Encode + Send + Sync + 'static, - R: Repo + Send + Sync + 'static, - { - fn flush_and_publish(&self) -> Option { - let offset = self - .clog - .flush_and_sync() - .expect("direct local durability: commitlog flush-and-sync failed"); - if let Some(offset) = offset { - self.durable_offset.send_modify(|val| { - val.replace(offset); - }); - } - self.durable_offset.borrow().as_ref().copied() - } - } - - impl Durability for DirectLocal - where - T: Encode + Send + Sync + 'static, - R: Repo + Send + Sync + 'static, - { - type TxData = Txdata; - - fn append_tx(&self, tx: PreparedTx) { - if self.closed.load(Ordering::Relaxed) { - panic!("`close` was called on this `DirectLocal` instance"); - } - let _guard = self.write_lock.lock().expect("direct local durability lock poisoned"); - self.clog - .commit([tx.into_transaction()]) - .expect("direct local durability: commitlog write failed"); - self.flush_and_publish(); - } - - fn durable_tx_offset(&self) -> DurableOffset { - self.durable_offset.subscribe().into() - } - - fn close(&self) -> Close { - self.closed.store(true, Ordering::Relaxed); - let _guard = self.write_lock.lock().expect("direct local durability lock poisoned"); - future::ready(self.flush_and_publish()).boxed() - } - } - #[cfg(test)] mod tests { - use futures::FutureExt as _; - use spacetimedb_commitlog::repo::Memory; - use spacetimedb_sats::ProductValue; - use super::*; - use crate::{Durability, Transaction}; - - #[test] - fn direct_local_publishes_durable_offset_inline() { - let durability = DirectLocal::::open_with_repo( - Memory::new(1024 * 1024), - local::Options::default(), - ) - .unwrap(); - - durability.append_tx(Box::new(Transaction { - offset: 0, - txdata: Txdata { - inputs: None, - outputs: None, - mutations: None, - }, - })); - - assert_eq!(durability.durable_tx_offset().last_seen(), Some(0)); - assert_eq!(durability.close().now_or_never().flatten(), Some(0)); - } } } diff --git a/crates/durability/tests/io/fallocate.rs b/crates/durability/tests/io/fallocate.rs index 64e50faf4cc..be5ee61bc0b 100644 --- a/crates/durability/tests/io/fallocate.rs +++ b/crates/durability/tests/io/fallocate.rs @@ -161,7 +161,7 @@ async fn local_durability( ) -> Result, spacetimedb_durability::local::OpenError> { spacetimedb_durability::Local::open( dir, - tokio::runtime::Handle::current(), + spacetimedb_runtime::RuntimeDispatch::tokio_current(), spacetimedb_durability::local::Options { commitlog: spacetimedb_commitlog::Options { max_segment_size, diff --git a/crates/runtime/README.md b/crates/runtime/README.md new file mode 100644 index 00000000000..f26134ba7bd --- /dev/null +++ b/crates/runtime/README.md @@ -0,0 +1,182 @@ +# spacetimedb-runtime + +`spacetimedb-runtime` is the small runtime abstraction layer shared by core +code and DST. It exists for one reason: code such as durability and +snapshotting needs to spawn work, run blocking sections, and wait with +timeouts, but we want that same code to run on either: + +- real Tokio in production, or +- the deterministic DST simulator in tests. + +The crate keeps that boundary narrow. Most callers should depend on +`RuntimeDispatch` instead of reaching directly for Tokio or simulator internals. + +## Top-level API + +The top-level module in [src/lib.rs](./src/lib.rs) exposes: + +- `RuntimeDispatch` + A small tagged runtime handle with two backends: + - `Tokio(tokio::runtime::Handle)` when the `tokio` feature is enabled + - `Simulation(sim::Handle)` when the `simulation` feature is enabled +- `spawn(...)` + Fire-and-forget task spawning. +- `spawn_blocking(...)` + Run blocking work on the runtime-appropriate backend. + On Tokio this uses `tokio::task::spawn_blocking`. + In simulation this is still scheduled through the simulator so ordering stays + deterministic. +- `timeout(...)` + Runtime-relative timeout handling. + On Tokio this uses `tokio::time::timeout`. + In simulation this uses virtual time from `sim::time`. +- `current_handle_or_new_runtime()` + Tokio convenience for production code that may or may not already be inside a + Tokio runtime. + +The design goal is intentionally modest: this crate is not a general async +framework. It is a compatibility layer for the small set of runtime operations +SpacetimeDB core code actually needs. + +## Features + +The crate has two independent backends: + +- `tokio` + Enables production runtime support and is part of the default feature set. +- `simulation` + Enables the deterministic local simulation runtime used by DST. + +Code can compile with one or both features enabled. `RuntimeDispatch` exposes +only the backends that were actually compiled in. + +## Simulation Modules + +The simulation backend lives under [src/sim](./src/sim). + +### `sim::mod` + +[src/sim/mod.rs](./src/sim/mod.rs) is the façade for the deterministic runtime. +It re-exports the main executor types and keeps the public surface small: + +- `Runtime` + Owns the simulator executor. +- `Handle` + Cloneable access to that executor from spawned tasks. +- `NodeId` + Logical node identifier used to group and pause/resume work. +- `JoinHandle` + Awaitable handle for spawned simulated tasks. +- `yield_now` + Cooperative yield point inside the simulator. +- `time` + Virtual time utilities. +- `Rng` and `DecisionSource` + Deterministic randomness primitives. + +It also exposes small helpers such as `advance_time(...)` and +`decision_source(...)`. + +### `sim::executor` + +[src/sim/executor.rs](./src/sim/executor.rs) is the heart of the simulator. + +It provides a single-threaded async executor adapted from madsim's task loop: + +- tasks are stored as `async_task` runnables +- ready work is chosen by a deterministic RNG instead of an OS/runtime scheduler +- node state can be paused and resumed +- a thread-local handle context makes the current simulation runtime accessible + from inside spawned work +- determinism can be checked by replaying the same future twice and comparing + the sequence of scheduler decisions + +Important behavior: + +- `Runtime::block_on(...)` drives the whole simulation +- `Handle::spawn_on(...)` schedules work onto a logical node +- absence of runnable work and absence of future timer wakeups is treated as a + hang, which is exactly what DST wants + +This module is the reason `RuntimeDispatch::Simulation` can behave like a real +runtime without giving up reproducibility. + +### `sim::time` + +[src/sim/time.rs](./src/sim/time.rs) implements virtual time. + +It provides: + +- `now()` + Current simulated time. +- `sleep(duration)` + A future that completes when simulated time reaches the deadline. +- `timeout(duration, future)` + Race a future against simulated time. +- `advance(duration)` + Move time forward explicitly. + +Internally it maintains: + +- a current `Duration` +- timer registrations keyed by deadline +- wakeups for due timers + +The executor uses this module to move time only when necessary, which keeps +tests deterministic and avoids tying correctness to wall-clock behavior. + +### `sim::rng` + +[src/sim/rng.rs](./src/sim/rng.rs) provides deterministic randomness. + +There are two layers: + +- `Rng` + Stateful deterministic RNG used by the executor and runtime internals. +- `DecisionSource` + Small lock-free source for probabilistic choices in test/workload code. + +This module also does two extra jobs: + +- records and checks determinism checkpoints so repeated seeded runs can prove + they took the same execution path +- hooks libc randomness calls such as `getrandom` so code running inside the + simulator sees deterministic randomness instead of ambient system entropy + +That second point matters because reproducibility falls apart quickly if a +dependency reads randomness outside the simulator's control. + +### `sim::system_thread` + +[src/sim/system_thread.rs](./src/sim/system_thread.rs) prevents accidental OS +thread creation while running under simulation. + +On Unix it intercepts `pthread_attr_init` and fails fast if code tries to spawn +real system threads from inside the simulator. That protects determinism and +enforces the intended execution model: simulated tasks should run on the +simulator, not escape onto real threads. + +## How This Crate Is Intended To Be Used + +For core code: + +- accept or store `RuntimeDispatch` +- use `spawn`, `spawn_blocking`, and `timeout` +- avoid embedding raw Tokio assumptions into shared logic + +For production-only code: + +- use `RuntimeDispatch::tokio_current()` or `RuntimeDispatch::tokio(handle)` + +For DST: + +- create `sim::Runtime` +- run the test harness with `Runtime::block_on(...)` +- pass `RuntimeDispatch::simulation_current()` into the code under test + +## Current Scope + +This crate is intentionally narrow. It is not trying to replace Tokio, and it +is not a generic distributed simulator. It currently provides exactly the +runtime seams needed by SpacetimeDB components that must run both in production +and under deterministic simulation. From 42e55dc416266c199a0390d21e779609bd68f5c0 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 16:29:22 +0530 Subject: [PATCH 35/74] snapshot abstraction --- crates/commitlog/src/lib.rs | 60 ++++++--- crates/commitlog/src/repo/mod.rs | 11 ++ crates/core/src/db/persistence.rs | 8 +- crates/core/src/db/relational_db.rs | 66 ++++++--- crates/core/src/db/snapshot.rs | 18 +-- crates/core/src/db/update.rs | 2 +- .../src/locking_tx_datastore/datastore.rs | 40 ++++-- crates/durability/src/imp/local.rs | 83 ++++++++++-- crates/snapshot/src/lib.rs | 126 +++++++++++++++++- 9 files changed, 340 insertions(+), 74 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index 3922f002a84..d80c1fb00b7 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -151,15 +151,22 @@ impl Options { } } -/// The canonical commitlog, backed by on-disk log files. +/// The canonical commitlog API over a repository backend `R`. +/// +/// The default backend is the on-disk filesystem repository +/// [`repo::Fs`], but tests may supply another [`Repo`] +/// implementation. /// /// Records in the log are of type `T`, which canonically is instantiated to /// [`payload::Txdata`]. -pub struct Commitlog { - inner: RwLock>, +pub struct Commitlog +where + R: Repo, +{ + inner: RwLock>, } -impl Commitlog { +impl Commitlog { /// Open the log at root directory `root` with [`Options`]. /// /// The root directory must already exist. @@ -178,7 +185,26 @@ impl Commitlog { root.display() ); } - let inner = commitlog::Generic::open(repo::Fs::new(root, on_new_segment)?, opts)?; + Self::open_with_repo(repo::Fs::new(root, on_new_segment)?, opts) + } + + /// Determine the size on disk of this commitlog. + pub fn size_on_disk(&self) -> io::Result { + let inner = self.inner.read().unwrap(); + inner.repo.size_on_disk() + } +} + +impl Commitlog +where + R: Repo, +{ + /// Open the log in `repo` with [`Options`]. + /// + /// This is useful for tests which provide a repository + /// implementation other than [`repo::Fs`]. + pub fn open_with_repo(repo: R, opts: Options) -> io::Result { + let inner = commitlog::Generic::open(repo, opts)?; Ok(Self { inner: RwLock::new(inner), @@ -307,7 +333,7 @@ impl Commitlog { /// This means that, when this iterator yields an `Err` value, the consumer /// may want to check if the iterator is exhausted (by calling `next()`) /// before treating the `Err` value as an application error. - pub fn commits(&self) -> impl Iterator> + use { + pub fn commits(&self) -> impl Iterator> + use { self.commits_from(0) } @@ -320,7 +346,10 @@ impl Commitlog { /// Note that the first [`StoredCommit`] yielded is the first commit /// containing the given transaction offset, i.e. its `min_tx_offset` may be /// smaller than `offset`. - pub fn commits_from(&self, offset: u64) -> impl Iterator> + use { + pub fn commits_from( + &self, + offset: u64, + ) -> impl Iterator> + use { self.inner.read().unwrap().commits_from(offset) } @@ -374,15 +403,12 @@ impl Commitlog { inner: RwLock::new(inner), }) } - - /// Determine the size on disk of this commitlog. - pub fn size_on_disk(&self) -> io::Result { - let inner = self.inner.read().unwrap(); - inner.repo.size_on_disk() - } } -impl Commitlog { +impl Commitlog +where + R: Repo, +{ /// Write `transactions` to the log. /// /// This will store all `transactions` as a single [Commit] @@ -452,10 +478,11 @@ impl Commitlog { pub fn transactions<'a, D>( &self, de: &'a D, - ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T> + ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T, R> where D: Decoder, D::Error: From, + R: 'a, T: 'a, { self.transactions_from(0, de) @@ -471,10 +498,11 @@ impl Commitlog { &self, offset: u64, de: &'a D, - ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T> + ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T, R> where D: Decoder, D::Error: From, + R: 'a, T: 'a, { self.inner.read().unwrap().transactions_from(offset, de) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 2b54216bad3..51df7accb81 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -144,6 +144,17 @@ pub trait Repo: Clone + fmt::Display { } } +/// Marker for repos that do not require an external lock file. +/// +/// Durability implementations can use this to expose repo-backed opening +/// only for storage backends where skipping the filesystem `db.lock` cannot +/// violate single-writer safety. +pub trait RepoWithoutLockFile: Repo {} + +impl RepoWithoutLockFile for &T {} + +impl RepoWithoutLockFile for Memory {} + impl Repo for &T { type SegmentWriter = T::SegmentWriter; type SegmentReader = T::SegmentReader; diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index e837506da38..5b0daa5145c 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use spacetimedb_commitlog::SizeOnDisk; use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; -use spacetimedb_snapshot::SnapshotRepository; +use spacetimedb_snapshot::DynSnapshotRepo; use crate::{messages::control_db::Database, util::asyncify}; @@ -61,9 +61,9 @@ impl Persistence { } } - /// If snapshots are enabled, get the [SnapshotRepository] they are stored in. - pub fn snapshot_repo(&self) -> Option<&SnapshotRepository> { - self.snapshots.as_ref().map(|worker| worker.repo()) + /// If snapshots are enabled, get the [SnapshotRepo] they are stored in. + pub fn snapshot_repo(&self) -> Option> { + self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } /// Get the [TxOffset] reported as durable by the [Durability] impl. diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index d8cd4884bcc..6df38806dca 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -51,7 +51,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotError, SnapshotRepository}; +use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -235,7 +235,7 @@ impl RelationalDB { /// /// - `snapshot_repo` /// - /// The [`SnapshotRepository`] which stores snapshots of this database. + /// The [`SnapshotRepo`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. /// If restoring from an existing database, the `snapshot_repo` must /// store views of the same sequence of TXes as the `history`. @@ -278,9 +278,10 @@ impl RelationalDB { let start_time = std::time::Instant::now(); + let snapshot_repo = persistence.as_ref().and_then(|p| p.snapshot_repo()); let inner = Self::restore_from_snapshot_or_bootstrap( database_identity, - persistence.as_ref().and_then(|p| p.snapshot_repo()), + snapshot_repo.as_deref(), durable_tx_offset, min_commitlog_offset, page_pool, @@ -292,7 +293,7 @@ impl RelationalDB { .snapshot_repo() .map(|repo| repo.database_identity() == database_identity) .unwrap_or(true), - "snapshot repository does not match database identity", + "snapshot repo does not match database identity", ); persistence.set_snapshot_state(inner.committed_state.clone()); } @@ -471,7 +472,7 @@ impl RelationalDB { fn restore_from_snapshot_or_bootstrap( database_identity: Identity, - snapshot_repo: Option<&SnapshotRepository>, + snapshot_repo: Option<&DynSnapshotRepo>, durable_tx_offset: Option, min_commitlog_offset: TxOffset, page_pool: PagePool, @@ -479,7 +480,7 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &SnapshotRepository, + snapshot_repo: &DynSnapshotRepo, snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { @@ -592,11 +593,12 @@ impl RelationalDB { // Invalidate the snapshot if the error is permanent. // Newly created snapshots should not depend on it. if !is_transient_error(&e) { - let path = snapshot_repo.snapshot_dir_path(snapshot_offset); - log::info!("invalidating bad snapshot at {}", path.display()); - path.rename_invalid().map_err(|e| RestoreSnapshotError::Invalidate { - offset: snapshot_offset, - source: Box::new(e.into()), + log::info!("invalidating bad snapshot at {snapshot_offset}"); + snapshot_repo.invalidate_snapshot(snapshot_offset).map_err(|e| { + RestoreSnapshotError::Invalidate { + offset: snapshot_offset, + source: Box::new(e), + } })?; } // Try the next older one if the error was transient. @@ -612,7 +614,7 @@ impl RelationalDB { } } } - log::info!("[{database_identity}] DATABASE: no usable snapshot on disk"); + log::info!("[{database_identity}] DATABASE: no usable snapshot in snapshot repo"); // If we didn't find a snapshot and the commitlog doesn't start at the // zero-th commit (e.g. due to archiving), there is no way to restore @@ -769,6 +771,19 @@ impl RelationalDB { r } + #[cfg(any(feature = "test", test))] + #[tracing::instrument(level = "trace", skip_all)] + pub fn try_begin_mut_tx(&self, isolation_level: IsolationLevel, workload: Workload) -> Option { + log::trace!("TRY BEGIN MUT TX"); + let r = self.inner.try_begin_mut_tx(isolation_level, workload); + if r.is_some() { + log::trace!("ACQUIRED MUT TX"); + } else { + log::trace!("MUT TX CONTENDED"); + } + r + } + #[tracing::instrument(level = "trace", skip_all)] pub fn begin_tx(&self, workload: Workload) -> Tx { log::trace!("BEGIN TX"); @@ -1007,7 +1022,7 @@ impl RelationalDB { Ok(self.inner.alter_table_row_type_mut_tx(tx, table_id, column_schemas)?) } - pub(crate) fn add_columns_to_table( + pub(crate) fn add_columns_to_table_mut_tx( &self, tx: &mut MutTx, table_id: TableId, @@ -1019,6 +1034,17 @@ impl RelationalDB { .add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values)?) } + #[cfg(any(feature = "test", test))] + pub fn add_columns_to_table( + &self, + tx: &mut MutTx, + table_id: TableId, + column_schemas: Vec, + default_values: Vec, + ) -> Result { + self.add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values) + } + /// Reports the `TxMetrics`s passed. /// /// Should only be called after the tx lock has been fully released. @@ -1777,7 +1803,6 @@ pub mod tests_utils { use spacetimedb_fs_utils::compression::CompressType; use spacetimedb_lib::{bsatn::to_vec, ser::Serialize}; use spacetimedb_paths::server::ReplicaDir; - use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::FromPathUnchecked; use tempfile::TempDir; @@ -2091,7 +2116,7 @@ pub mod tests_utils { Arc::new(|_, _| i64::MAX) } - pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result, DBError> { + pub fn take_snapshot(&self, repo: &DynSnapshotRepo) -> Result, DBError> { Ok(self.inner.take_snapshot(repo)?) } } @@ -3661,7 +3686,7 @@ mod tests { let repo = open_snapshot_repo(dir, Identity::ZERO, 0)?; RelationalDB::restore_from_snapshot_or_bootstrap( Identity::ZERO, - Some(&repo), + Some(repo.as_ref()), Some(last_compress), 0, PagePool::new_for_test(), @@ -3689,8 +3714,13 @@ mod tests { ); let last = repo.latest_snapshot()?; - let stdb = - RelationalDB::restore_from_snapshot_or_bootstrap(identity, Some(&repo), last, 0, PagePool::new_for_test())?; + let stdb = RelationalDB::restore_from_snapshot_or_bootstrap( + identity, + Some(repo.as_ref()), + last, + 0, + PagePool::new_for_test(), + )?; let out = TempDir::with_prefix("snapshot_test")?; let dir = SnapshotsPath::from_path_unchecked(out.path()); diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index c47e1d33d2d..26e3d8373cf 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,7 +14,7 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, SnapshotRepository}; +use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; use tokio::sync::watch; use crate::{util::asyncify, worker_metrics::WORKER_METRICS}; @@ -60,7 +60,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repository: Arc, + snapshot_repository: Arc, } impl SnapshotWorker { @@ -69,7 +69,7 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression) -> Self { + pub fn new(snapshot_repository: Arc, compression: Compression) -> Self { let database = snapshot_repository.database_identity(); let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); @@ -105,9 +105,9 @@ impl SnapshotWorker { .expect("snapshot worker panicked"); } - /// Get the [SnapshotRepository] this worker is operating on. - pub fn repo(&self) -> &SnapshotRepository { - &self.snapshot_repository + /// Get the snapshot repo this worker is operating on. + pub fn snapshot_repo(&self) -> Arc { + self.snapshot_repository.clone() } /// Request a snapshot to be taken. @@ -166,7 +166,7 @@ enum Request { struct SnapshotWorkerActor { snapshot_requests: mpsc::UnboundedReceiver, - snapshot_repo: Arc, + snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, compression: Option, @@ -225,7 +225,7 @@ impl SnapshotWorkerActor { let maybe_snapshot = asyncify(move || { let _timer = inner_timer.start_timer(); - Locking::take_snapshot_internal(&state, &snapshot_repo) + Locking::take_snapshot_internal(&state, snapshot_repo.as_ref()) }) .await .with_context(|| format!("error capturing snapshot of database {}", database_identity))?; @@ -307,7 +307,7 @@ impl CompressionMetrics { } struct Compressor { - snapshot_repo: Arc, + snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, } diff --git a/crates/core/src/db/update.rs b/crates/core/src/db/update.rs index 6c7c3bd9fc8..f9ca4c110d9 100644 --- a/crates/core/src/db/update.rs +++ b/crates/core/src/db/update.rs @@ -317,7 +317,7 @@ fn auto_migrate_database( .iter() .filter_map(|col_def| col_def.default_value.clone()) .collect(); - stdb.add_columns_to_table(tx, table_id, column_schemas, default_values)?; + stdb.add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values)?; } spacetimedb_schema::auto_migrate::AutoMigrateStep::DisconnectAllUsers => { log!(logger, "Disconnecting all users"); diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index edcce91ce5e..e9d67103b16 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -30,7 +30,6 @@ use spacetimedb_data_structures::map::{HashCollectionExt, HashMap}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::{db::auth::StAccess, metrics::ExecutionMetrics}; use spacetimedb_lib::{ConnectionId, Identity}; -use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_primitives::{ColId, ColList, ConstraintId, IndexId, SequenceId, TableId, ViewId}; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::{AlgebraicValue, ProductValue}; @@ -39,7 +38,7 @@ use spacetimedb_schema::{ reducer_name::ReducerName, schema::{ColumnSchema, IndexSchema, SequenceSchema, TableSchema}, }; -use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotRepository, UnflushedSnapshot}; +use spacetimedb_snapshot::{BoxedPendingSnapshot, DynSnapshotRepo, ReconstructedSnapshot}; use spacetimedb_table::{ indexes::RowPointer, page_pool::PagePool, @@ -223,11 +222,11 @@ impl Locking { /// (i.e. no transactions have been committed yet) /// and therefore no snapshot was created /// - /// - or `Some` path to the newly created snapshot directory + /// - or `Some` transaction offset for the newly created snapshot /// - /// Returns an error if [`SnapshotRepository::create_snapshot`] returns an + /// Returns an error if [`DynSnapshotRepo::create_snapshot`] returns an /// error. - pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result> { + pub fn take_snapshot(&self, repo: &DynSnapshotRepo) -> Result> { Self::take_snapshot_internal(&self.committed_state, repo)? .map(|(_offset, snap)| snap.sync_all()) .transpose() @@ -241,8 +240,8 @@ impl Locking { pub fn take_snapshot_internal( committed_state: &RwLock, - repo: &SnapshotRepository, - ) -> Result> { + repo: &DynSnapshotRepo, + ) -> Result> { let mut committed_state = committed_state.write(); let Some(tx_offset) = committed_state.next_tx_offset.checked_sub(1) else { return Ok(None); @@ -254,8 +253,8 @@ impl Locking { tx_offset, ); - let (tables, blob_store) = committed_state.persistent_tables_and_blob_store(); - let unflushed_snapshot = repo.create_snapshot(tables, blob_store, tx_offset)?; + let (mut tables, blob_store) = committed_state.persistent_tables_and_blob_store(); + let unflushed_snapshot = repo.create_snapshot(&mut tables, blob_store, tx_offset)?; Ok(Some((tx_offset, unflushed_snapshot))) } @@ -924,6 +923,29 @@ impl MutTx for Locking { } impl Locking { + #[cfg(any(feature = "test", test))] + pub fn try_begin_mut_tx(&self, _isolation_level: IsolationLevel, workload: Workload) -> Option { + let metrics = ExecutionMetrics::default(); + let ctx = ExecutionContext::with_workload(self.database_identity, workload); + + let timer = Instant::now(); + let committed_state_write_lock = self.committed_state.try_write_arc()?; + let sequence_state_lock = self.sequence_state.try_lock_arc()?; + let lock_wait_time = timer.elapsed(); + + Some(MutTxId { + committed_state_write_lock, + sequence_state_lock, + tx_state: TxState::default(), + lock_wait_time, + read_sets: <_>::default(), + timer, + ctx, + metrics, + _not_send: std::marker::PhantomData, + }) + } + pub fn rollback_mut_tx_downgrade(&self, tx: MutTxId, workload: Workload) -> (TxMetrics, TxId) { tx.rollback_downgrade(workload) } diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 3bf1921e8a8..5cc03099ab6 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -11,7 +11,12 @@ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; use scopeguard::ScopeGuard; -use spacetimedb_commitlog::{error, payload::Txdata, Commit, Commitlog, Decoder, Encode, Transaction}; +use spacetimedb_commitlog::{ + error, + payload::Txdata, + repo::{Fs, Repo, RepoWithoutLockFile}, + Commit, Commitlog, Decoder, Encode, Transaction, +}; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; use thiserror::Error; @@ -83,9 +88,12 @@ pub enum OpenError { /// /// Note, however, that instantiating `T` to a different type may require to /// change the log format version! -pub struct Local { +pub struct Local +where + R: Repo, +{ /// The [`Commitlog`] this [`Durability`] and [`History`] impl wraps. - clog: Arc>>, + clog: Arc, R>>, /// The durable transaction offset, as reported by the background /// [`FlushAndSyncTask`]. durable_offset: watch::Receiver>, @@ -106,7 +114,7 @@ pub struct Local { actor: Mutex>>, } -impl Local { +impl Local { /// Create a [`Local`] instance at the `replica_dir`. /// /// `replica_dir` must already exist. @@ -132,6 +140,21 @@ impl Local { opts.commitlog, on_new_segment, )?); + Self::open_inner(clog, rt, opts, Some(lock)) + } +} + +impl Local +where + T: Encode + Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ + fn open_inner( + clog: Arc, R>>, + rt: tokio::runtime::Handle, + opts: Options, + lock: Option, + ) -> Result { let queue_capacity = opts.queue_capacity(); let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); @@ -161,12 +184,29 @@ impl Local { } /// Obtain a read-only copy of the durable state that implements [History]. - pub fn as_history(&self) -> impl History> + use { + pub fn as_history(&self) -> impl History> + use { self.clog.clone() } } -impl Local { +impl Local +where + T: Encode + Send + Sync + 'static, + R: RepoWithoutLockFile + Send + Sync + 'static, +{ + /// Create a [`Local`] instance backed by the provided commitlog repo. + pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { + info!("open local durability"); + let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); + Self::open_inner(clog, rt, opts, None) + } +} + +impl Local +where + T: Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ /// Inspect how many transactions added via [`Self::append_tx`] are pending /// to be applied to the underlying [`Commitlog`]. pub fn queue_depth(&self) -> u64 { @@ -174,7 +214,7 @@ impl Local { } /// Obtain an iterator over the [`Commit`]s in the underlying log. - pub fn commits_from(&self, offset: TxOffset) -> impl Iterator> + use { + pub fn commits_from(&self, offset: TxOffset) -> impl Iterator> + use { self.clog.commits_from(offset).map_ok(Commit::from) } @@ -187,15 +227,20 @@ impl Local { pub fn compress_segments(&self, offsets: &[TxOffset]) -> io::Result<()> { self.clog.compress_segments(offsets) } +} +impl Local { /// Get the size on disk of the underlying [`Commitlog`]. pub fn size_on_disk(&self) -> io::Result { self.clog.size_on_disk() } } -struct Actor { - clog: Arc>>, +struct Actor +where + R: Repo, +{ + clog: Arc, R>>, durable_offset: watch::Sender>, queue_depth: Arc, @@ -203,10 +248,14 @@ struct Actor { batch_capacity: NonZeroUsize, #[allow(unused)] - lock: LockedFile, + lock: Option, } -impl Actor { +impl Actor +where + T: Encode + Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ #[instrument(name = "durability::local::actor", skip_all)] async fn run(self, transactions_rx: async_channel::Receiver>>) { info!("starting durability actor"); @@ -287,7 +336,11 @@ impl Actor { } } -impl Durability for Local { +impl Durability for Local +where + T: Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ type TxData = Txdata; fn append_tx(&self, tx: PreparedTx) { @@ -332,7 +385,11 @@ impl Durability for Local { } } -impl History for Commitlog> { +impl History for Commitlog, R> +where + T: Encode + 'static, + R: Repo + Send + Sync + 'static, +{ type TxData = Txdata; fn fold_transactions_from(&self, offset: TxOffset, decoder: D) -> Result<(), D::Error> diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 66c25ed824a..66bc3815af8 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -46,7 +46,7 @@ use spacetimedb_table::{ }; use std::fs::{self, File}; use std::io; -use std::ops::RangeBounds; +use std::ops::{Range, RangeBounds}; use std::path::Path; use std::time::{Duration, Instant}; use std::{ @@ -206,6 +206,11 @@ pub struct UnflushedSnapshot { } impl UnflushedSnapshot { + /// Return the transaction offset this pending snapshot will finalize at. + pub fn tx_offset(&self) -> TxOffset { + self.inner.as_ref().unwrap().snapshot.tx_offset + } + /// Sync all objects in the snapshot and write out the snapshot file. /// /// Returns the [SnapshotDirPath] on success. @@ -261,6 +266,28 @@ impl UnflushedSnapshotInner { } } +pub trait PendingSnapshot: Send { + /// Sync all snapshot state and return the finalized transaction offset. + fn sync_all(self: Box) -> Result; +} + +pub type BoxedPendingSnapshot = Box; +pub type DynSnapshotRepo = dyn SnapshotRepo; + +impl PendingSnapshot for BoxedPendingSnapshot { + fn sync_all(self: Box) -> Result { + (*self).sync_all() + } +} + +impl PendingSnapshot for UnflushedSnapshot { + fn sync_all(self: Box) -> Result { + let tx_offset = self.tx_offset(); + UnflushedSnapshot::sync_all(*self)?; + Ok(tx_offset) + } +} + #[derive(Clone, Serialize, Deserialize)] pub struct Snapshot { /// A magic number: must be equal to [`MAGIC`]. @@ -1139,13 +1166,19 @@ impl SnapshotRepository { .collect::>(); for newer_snapshot in newer_snapshots { - let path = self.snapshot_dir_path(newer_snapshot); - log::info!("Renaming snapshot newer than {upper_bound} from {path:?} to {path:?}"); - path.rename_invalid()?; + self.invalidate_snapshot(newer_snapshot)?; } Ok(()) } + /// Mark a single snapshot invalid so it will not be considered for future + /// restores. + pub fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + let path = self.snapshot_dir_path(tx_offset); + log::info!("Renaming snapshot {tx_offset} from {path:?} to invalid"); + path.rename_invalid().map_err(Into::into) + } + /// Compress the `current` snapshot, unless it is already compressed. /// /// If a `parent` snapshot is given, its object repo will be used to @@ -1329,6 +1362,91 @@ impl SnapshotRepository { } } +/// Snapshot storage backend. +pub trait SnapshotRepo: Send + Sync { + type Pending: PendingSnapshot; + + /// Return the database identity associated with this snapshot backend. + fn database_identity(&self) -> Identity; + + /// Start creating a snapshot at `tx_offset` from the provided tables and blob store. + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result; + + /// Reconstruct the snapshot at `tx_offset` using the supplied page pool. + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result; + + /// Return the latest snapshot at or before `upper_bound`, if one exists. + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError>; + + /// Return the latest snapshot in this backend, if one exists. + fn latest_snapshot(&self) -> Result, SnapshotError> { + self.latest_snapshot_older_than(TxOffset::MAX) + } + + /// Attempt to compress all snapshots that fall into `range`, and record + /// the outcome in `stats`. + /// + /// The snapshots in `range` are traversed in ascending order. + /// If an error occurs, processing stops and the error is returned. + /// + /// See [CompressionStats] for how to interpret the results. + fn compress_snapshots(&self, stats: &mut CompressionStats, range: Range) -> Result<(), SnapshotError>; + + /// Invalidate every snapshot newer than `upper_bound`. + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError>; + + /// Invalidate the snapshot at `tx_offset`. + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError>; +} + +impl SnapshotRepo for SnapshotRepository { + type Pending = BoxedPendingSnapshot; + + fn database_identity(&self) -> Identity { + SnapshotRepository::database_identity(self) + } + + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + Ok(Box::new(SnapshotRepository::create_snapshot( + self, tables, blobs, tx_offset, + )?)) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + SnapshotRepository::read_snapshot(self, tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + SnapshotRepository::latest_snapshot_older_than(self, upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + SnapshotRepository::latest_snapshot(self) + } + + fn compress_snapshots(&self, stats: &mut CompressionStats, range: Range) -> Result<(), SnapshotError> { + SnapshotRepository::compress_snapshots(self, stats, range) + } + + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_newer_snapshots(self, upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_snapshot(self, tx_offset) + } +} + pub struct ReconstructedSnapshot { /// The identity of the snapshotted database. pub database_identity: Identity, From f508a0462f806452daa7cf913ca34aefc684a332 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 17:05:14 +0530 Subject: [PATCH 36/74] lint --- crates/commitlog/src/repo/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 51df7accb81..358936c3c2a 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -153,6 +153,7 @@ pub trait RepoWithoutLockFile: Repo {} impl RepoWithoutLockFile for &T {} +#[cfg(any(test, feature = "test"))] impl RepoWithoutLockFile for Memory {} impl Repo for &T { From 5356b8186b3102efb41ab401e9f5de805f4bf391 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 17:13:02 +0530 Subject: [PATCH 37/74] Add runtime crate and RuntimeDispatch integration --- Cargo.lock | 37 +- Cargo.toml | 4 +- crates/core/Cargo.toml | 2 + crates/core/src/database_logger.rs | 6 +- crates/core/src/db/durability.rs | 15 +- crates/core/src/db/persistence.rs | 29 +- crates/core/src/db/relational_db.rs | 48 +- crates/core/src/db/snapshot.rs | 97 ++- crates/core/src/lib.rs | 1 + crates/core/src/runtime.rs | 3 + .../subscription/module_subscription_actor.rs | 2 +- crates/durability/Cargo.toml | 1 + crates/durability/src/imp/local.rs | 78 +- crates/durability/src/imp/mod.rs | 5 + crates/durability/tests/io/fallocate.rs | 2 +- crates/runtime/Cargo.toml | 24 + crates/runtime/LICENSE | 731 ++++++++++++++++++ crates/runtime/README.md | 182 +++++ crates/runtime/src/lib.rs | 122 +++ crates/runtime/src/sim/executor.rs | 589 ++++++++++++++ crates/runtime/src/sim/mod.rs | 23 + crates/runtime/src/sim/rng.rs | 367 +++++++++ crates/runtime/src/sim/system_thread.rs | 64 ++ crates/runtime/src/sim/time.rs | 343 ++++++++ crates/standalone/Cargo.toml | 2 +- crates/standalone/src/subcommands/start.rs | 97 ++- 26 files changed, 2721 insertions(+), 153 deletions(-) create mode 100644 crates/core/src/runtime.rs create mode 100644 crates/runtime/Cargo.toml create mode 100644 crates/runtime/LICENSE create mode 100644 crates/runtime/README.md create mode 100644 crates/runtime/src/lib.rs create mode 100644 crates/runtime/src/sim/executor.rs create mode 100644 crates/runtime/src/sim/mod.rs create mode 100644 crates/runtime/src/sim/rng.rs create mode 100644 crates/runtime/src/sim/system_thread.rs create mode 100644 crates/runtime/src/sim/time.rs diff --git a/Cargo.lock b/Cargo.lock index 24761b41064..a0193647eae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,7 +34,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", "once_cell", "version_check", ] @@ -276,6 +276,12 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + [[package]] name = "async-trait" version = "0.1.89" @@ -2551,9 +2557,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "a06fddc2749e0528d2813f95e050e87e52c8cbbae56223b9babf73b3e53b0cc6" dependencies = [ "cfg-if", "js-sys", @@ -6113,7 +6119,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", ] [[package]] @@ -6187,7 +6193,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", "libredox", "thiserror 1.0.69", ] @@ -6416,7 +6422,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.13", "libc", "untrusted", "windows-sys 0.52.0", @@ -7649,7 +7655,7 @@ checksum = "db18cb19c7499ba4a65b1504442179a7e4aba487dc35978d90966c5ca02ee16b" dependencies = [ "bytemuck", "derive_more 0.99.20", - "getrandom 0.2.16", + "getrandom 0.2.13", "log", "rand 0.8.5", "scoped-tls", @@ -7668,7 +7674,7 @@ dependencies = [ "bytemuck", "bytes", "derive_more 0.99.20", - "getrandom 0.2.16", + "getrandom 0.2.13", "http 1.3.1", "insta", "log", @@ -8093,6 +8099,7 @@ dependencies = [ "spacetimedb-physical-plan", "spacetimedb-primitives 2.2.0", "spacetimedb-query", + "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "spacetimedb-schema", "spacetimedb-snapshot", @@ -8190,6 +8197,7 @@ dependencies = [ "spacetimedb-commitlog", "spacetimedb-fs-utils", "spacetimedb-paths", + "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "tempfile", "thiserror 1.0.69", @@ -8458,6 +8466,19 @@ dependencies = [ "spacetimedb-lib 2.2.0", ] +[[package]] +name = "spacetimedb-runtime" +version = "2.2.0" +dependencies = [ + "anyhow", + "async-task", + "futures", + "futures-util", + "libc", + "tokio", + "tracing", +] + [[package]] name = "spacetimedb-sats" version = "1.9.0" diff --git a/Cargo.toml b/Cargo.toml index 75deef78a3b..f0678e29cc6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ members = [ "crates/physical-plan", "crates/primitives", "crates/query", + "crates/runtime", "crates/sats", "crates/schema", "crates/smoketests", @@ -138,6 +139,7 @@ spacetimedb-pg = { path = "crates/pg", version = "=2.2.0" } spacetimedb-physical-plan = { path = "crates/physical-plan", version = "=2.2.0" } spacetimedb-primitives = { path = "crates/primitives", version = "=2.2.0" } spacetimedb-query = { path = "crates/query", version = "=2.2.0" } +spacetimedb-runtime = { path = "crates/runtime", version = "=2.2.0", default-features = false } spacetimedb-sats = { path = "crates/sats", version = "=2.2.0" } spacetimedb-schema = { path = "crates/schema", version = "=2.2.0" } spacetimedb-standalone = { path = "crates/standalone", version = "=2.2.0" } @@ -388,7 +390,7 @@ features = [ ] [workspace.lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(simulation)'] } [workspace.lints.clippy] # FIXME: we should work on this lint incrementally diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index acdc578080d..2947eccac9d 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -28,6 +28,7 @@ spacetimedb-primitives.workspace = true spacetimedb-paths.workspace = true spacetimedb-physical-plan.workspace = true spacetimedb-query.workspace = true +spacetimedb-runtime = { workspace = true, features = ["tokio"] } spacetimedb-sats = { workspace = true, features = ["serde"] } spacetimedb-schema.workspace = true spacetimedb-table.workspace = true @@ -133,6 +134,7 @@ tikv-jemalloc-ctl = {workspace = true} [target.'cfg(target_os = "linux")'.dependencies] nix = { workspace = true, features = ["sched"] } + [features] # Print a warning when doing an unindexed `iter_by_col_range` on a large table. unindexed_iter_by_col_range_warn = [] diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index 0e202229dea..f194cb60a48 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -11,7 +11,7 @@ use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use tokio::io::{AsyncRead, BufReader}; +use tokio::io::{AsyncRead, BufReader, ReadBuf}; use tokio::sync::{broadcast, mpsc, oneshot}; use tokio_stream::wrappers::errors::BroadcastStreamRecvError; use tokio_stream::wrappers::BroadcastStream; @@ -592,7 +592,7 @@ fn seek_to(file: &mut File, buf: &mut [u8], num_lines: u32) -> io::Result<()> { Ok(()) } -fn read_exact_at(file: &std::fs::File, buf: &mut [u8], offset: u64) -> io::Result<()> { +fn read_exact_at(file: &File, buf: &mut [u8], offset: u64) -> io::Result<()> { #[cfg(unix)] { use std::os::unix::fs::FileExt; @@ -641,7 +641,7 @@ impl MaybeFile { } impl AsyncRead for MaybeFile { - fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>) -> Poll> { + fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { match self.project() { MaybeFileProj::File { inner } => inner.poll_read(cx, buf), MaybeFileProj::Empty => Poll::Ready(Ok(())), diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index c17a10e9f63..3a466d53eb6 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -9,9 +9,8 @@ use spacetimedb_datastore::{execution_context::ReducerContext, traits::TxData}; use spacetimedb_durability::Transaction; use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; -use tokio::{runtime, time::timeout}; -use crate::db::persistence::Durability; +use crate::{db::persistence::Durability, runtime::RuntimeDispatch}; pub(super) fn request_durability( durability: &Durability, @@ -32,11 +31,12 @@ pub(super) fn request_durability( })); } -pub(super) fn spawn_close(durability: Arc, runtime: &runtime::Handle, database_identity: Identity) { - let rt = runtime.clone(); - rt.spawn(async move { - let label = format!("[{database_identity}]"); - match timeout(Duration::from_secs(10), durability.close()).await { +pub(super) fn spawn_close(durability: Arc, runtime: &RuntimeDispatch, database_identity: Identity) { + let label = format!("[{database_identity}]"); + let runtime = runtime.clone(); + runtime.clone().spawn(async move { + log::info!("starting spawn close"); + match runtime.timeout(Duration::from_secs(10), durability.close()).await { Err(_elapsed) => { error!("{label} timeout waiting for durability shutdown"); } @@ -44,6 +44,7 @@ pub(super) fn spawn_close(durability: Arc, runtime: &runtime::Handle info!("{label} durability shut down at tx offset: {offset:?}"); } } + log::info!("closing spawn close"); }); } diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index 5b0daa5145c..83d58befb06 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -6,7 +6,7 @@ use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::DynSnapshotRepo; -use crate::{messages::control_db::Database, util::asyncify}; +use crate::{messages::control_db::Database, runtime::RuntimeDispatch, util::asyncify}; use super::{ relational_db::{self, Txdata}, @@ -41,8 +41,8 @@ pub struct Persistence { /// persistent (as opposed to in-memory) databases. This is enforced by /// this type. pub snapshots: Option, - /// The tokio runtime onto which durability-related tasks shall be spawned. - pub runtime: tokio::runtime::Handle, + /// Runtime onto which durability-related tasks shall be spawned. + pub runtime: RuntimeDispatch, } impl Persistence { @@ -52,6 +52,15 @@ impl Persistence { disk_size: impl Fn() -> io::Result + Send + Sync + 'static, snapshots: Option, runtime: tokio::runtime::Handle, + ) -> Self { + Self::new_with_runtime(durability, disk_size, snapshots, RuntimeDispatch::tokio(runtime)) + } + + pub fn new_with_runtime( + durability: impl spacetimedb_durability::Durability + 'static, + disk_size: impl Fn() -> io::Result + Send + Sync + 'static, + snapshots: Option, + runtime: RuntimeDispatch, ) -> Self { Self { durability: Arc::new(durability), @@ -61,7 +70,7 @@ impl Persistence { } } - /// If snapshots are enabled, get the [SnapshotRepo] they are stored in. + /// If snapshots are enabled, get the snapshot repository they are stored in. pub fn snapshot_repo(&self) -> Option> { self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } @@ -91,7 +100,7 @@ impl Persistence { Option>, Option, Option, - Option, + Option, ) { this.map( |Self { @@ -148,7 +157,13 @@ impl PersistenceProvider for LocalPersistenceProvider { let snapshot_worker = asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled))?; + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Enabled, + RuntimeDispatch::tokio_current(), + ) + })?; let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; tokio::spawn(relational_db::snapshot_watching_commitlog_compressor( @@ -162,7 +177,7 @@ impl PersistenceProvider for LocalPersistenceProvider { durability, disk_size, snapshots: Some(snapshot_worker), - runtime: tokio::runtime::Handle::current(), + runtime: RuntimeDispatch::tokio_current(), }) } } diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 6df38806dca..57c7cde59cc 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -1,6 +1,7 @@ use crate::db::durability::{request_durability, spawn_close as spawn_durability_close}; use crate::db::MetricsRecorderQueue; use crate::error::{DBError, RestoreSnapshotError}; +use crate::runtime::RuntimeDispatch; use crate::subscription::ExecutionCounters; use crate::util::asyncify; use crate::worker_metrics::WORKER_METRICS; @@ -41,6 +42,8 @@ use spacetimedb_lib::st_var::StVarValue; use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; +#[cfg(test)] +use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_primitives::*; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; @@ -51,7 +54,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository}; +use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepo, SnapshotRepository}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -99,7 +102,7 @@ pub struct RelationalDB { inner: Locking, durability: Option>, - durability_runtime: Option, + durability_runtime: Option, snapshot_worker: Option, row_count_fn: RowCountFn, @@ -133,10 +136,13 @@ impl std::fmt::Debug for RelationalDB { impl Drop for RelationalDB { fn drop(&mut self) { + log::info!("starting drop"); // Attempt to flush the outstanding transactions. if let (Some(durability), Some(runtime)) = (self.durability.take(), self.durability_runtime.take()) { spawn_durability_close(durability, &runtime, self.database_identity); } + + log::info!("drop done"); } } @@ -233,11 +239,12 @@ impl RelationalDB { /// /// `None` may be passed to obtain an in-memory only database. /// - /// - `snapshot_repo` + /// - snapshots /// - /// The [`SnapshotRepo`] which stores snapshots of this database. + /// Optional snapshot persistence and background snapshot execution, + /// carried through [`Persistence`]. /// This is only meaningful if `history` and `durability` are also supplied. - /// If restoring from an existing database, the `snapshot_repo` must + /// If restoring from an existing database, the snapshot repository must /// store views of the same sequence of TXes as the `history`. /// /// - `metrics_recorder_queue` @@ -480,7 +487,7 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &DynSnapshotRepo, + snapshot_repo: &(impl SnapshotRepo + ?Sized), snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { @@ -614,7 +621,7 @@ impl RelationalDB { } } } - log::info!("[{database_identity}] DATABASE: no usable snapshot in snapshot repo"); + log::info!("[{database_identity}] DATABASE: no usable snapshot in store"); // If we didn't find a snapshot and the commitlog doesn't start at the // zero-th commit (e.g. due to archiving), there is no way to restore @@ -1671,7 +1678,7 @@ pub async fn local_durability( replica_dir: ReplicaDir, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { - let rt = tokio::runtime::Handle::current(); + let runtime = RuntimeDispatch::tokio_current(); let on_new_segment = snapshot_worker.map(|snapshot_worker| { let snapshot_worker = snapshot_worker.clone(); Arc::new(move || { @@ -1683,7 +1690,7 @@ pub async fn local_durability( let local = asyncify(move || { durability::Local::open( replica_dir.clone(), - rt, + runtime, <_>::default(), // Give the durability a handle to request a new snapshot run, // which it will send down whenever we rotate commitlog segments. @@ -1803,6 +1810,7 @@ pub mod tests_utils { use spacetimedb_fs_utils::compression::CompressType; use spacetimedb_lib::{bsatn::to_vec, ser::Serialize}; use spacetimedb_paths::server::ReplicaDir; + use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::FromPathUnchecked; use tempfile::TempDir; @@ -1950,7 +1958,13 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id) - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Disabled)) + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + RuntimeDispatch::tokio(rt.clone()), + ) + }) }) .transpose()?; @@ -1961,7 +1975,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: rt, + runtime: RuntimeDispatch::tokio(rt), }; let (db, _) = RelationalDB::open( @@ -2073,7 +2087,13 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0) - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Disabled)) + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + RuntimeDispatch::tokio(rt.clone()), + ) + }) }) .transpose()?; let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; @@ -2082,7 +2102,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: rt, + runtime: RuntimeDispatch::tokio(rt), }; let db = Self::open_db(history, Some(persistence), None, 0)?; @@ -2116,7 +2136,7 @@ pub mod tests_utils { Arc::new(|_, _| i64::MAX) } - pub fn take_snapshot(&self, repo: &DynSnapshotRepo) -> Result, DBError> { + pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result, DBError> { Ok(self.inner.take_snapshot(repo)?) } } diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 26e3d8373cf..dda981a89bd 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,10 +14,10 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; +use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo, SnapshotRepository}; use tokio::sync::watch; -use crate::{util::asyncify, worker_metrics::WORKER_METRICS}; +use crate::{runtime::RuntimeDispatch, worker_metrics::WORKER_METRICS}; pub type SnapshotDatabaseState = Arc>; @@ -60,7 +60,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repository: Arc, + snapshot_repo: Arc, } impl SnapshotWorker { @@ -69,29 +69,26 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression) -> Self { - let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); + pub fn new(snapshot_repo: Arc, runtime: RuntimeDispatch) -> Self { + let database = snapshot_repo.database_identity(); + let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repo: snapshot_repository.clone(), + snapshot_repo: snapshot_repo.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), - compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repository.clone(), - metrics: CompressionMetrics::new(database), - stats: <_>::default(), - }), + runtime: runtime.clone(), + compression: None, }; - tokio::spawn(actor.run()); + runtime.spawn(actor.run()); Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository, + snapshot_repo, } } @@ -105,9 +102,9 @@ impl SnapshotWorker { .expect("snapshot worker panicked"); } - /// Get the snapshot repo this worker is operating on. + /// Get the snapshot repository this worker is operating on. pub fn snapshot_repo(&self) -> Arc { - self.snapshot_repository.clone() + self.snapshot_repo.clone() } /// Request a snapshot to be taken. @@ -141,6 +138,40 @@ impl SnapshotWorker { } } +impl SnapshotWorker { + pub fn new_with_repository( + snapshot_repository: Arc, + compression: Compression, + runtime: RuntimeDispatch, + ) -> Self { + let database = snapshot_repository.database_identity(); + let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); + let (snapshot_created, _) = watch::channel(latest_snapshot); + let (request_tx, request_rx) = mpsc::unbounded(); + + let actor = SnapshotWorkerActor { + snapshot_requests: request_rx, + snapshot_repo: snapshot_repository.clone(), + snapshot_created: snapshot_created.clone(), + metrics: SnapshotMetrics::new(database), + runtime: runtime.clone(), + compression: compression.is_enabled().then(|| Compressor { + snapshot_repo: snapshot_repository.clone(), + metrics: CompressionMetrics::new(database), + stats: <_>::default(), + runtime: runtime.clone(), + }), + }; + runtime.spawn(actor.run()); + + Self { + snapshot_created, + request_snapshot: request_tx, + snapshot_repo: snapshot_repository, + } + } +} + struct SnapshotMetrics { snapshot_timing_total: Histogram, snapshot_timing_inner: Histogram, @@ -169,6 +200,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, + runtime: RuntimeDispatch, compression: Option, } @@ -220,21 +252,24 @@ impl SnapshotWorkerActor { let inner_timer = self.metrics.snapshot_timing_inner.clone(); let snapshot_repo = self.snapshot_repo.clone(); + let runtime = self.runtime.clone(); let database_identity = self.snapshot_repo.database_identity(); - let maybe_snapshot = asyncify(move || { - let _timer = inner_timer.start_timer(); - Locking::take_snapshot_internal(&state, snapshot_repo.as_ref()) - }) - .await - .with_context(|| format!("error capturing snapshot of database {}", database_identity))?; - let (snapshot_offset, unflushed_snapshot) = maybe_snapshot.with_context(|| { - format!( - "refusing to take snapshot of database {} at TX offset -1", - database_identity - ) - })?; + let maybe_snapshot = runtime + .spawn_blocking(move || { + let _timer = inner_timer.start_timer(); + Locking::take_snapshot_internal(&state, snapshot_repo.as_ref()) + }) + .await + .with_context(|| format!("error capturing snapshot of database {}", database_identity))? + .with_context(|| { + format!( + "refusing to take snapshot of database {} at TX offset -1", + database_identity + ) + })?; + let (snapshot_offset, unflushed_snapshot) = maybe_snapshot; self.metrics .snapshot_timing_fsync .observe_closure_duration(|| unflushed_snapshot.sync_all())?; @@ -307,9 +342,10 @@ impl CompressionMetrics { } struct Compressor { - snapshot_repo: Arc, + snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, + runtime: RuntimeDispatch, } impl Compressor { @@ -341,7 +377,8 @@ impl Compressor { let range = start..latest_snapshot; let mut stats = self.stats.take().unwrap_or_default(); - let (mut stats, res) = asyncify({ + let runtime = self.runtime.clone(); + let (mut stats, res) = runtime.spawn_blocking({ let range = range.clone(); move || { let _timer = inner_timer.start_timer(); diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 26b35230b1f..4a7246bcbd7 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -18,6 +18,7 @@ pub mod estimation; pub mod host; pub mod module_host_context; pub mod replica_context; +pub mod runtime; pub mod startup; pub mod subscription; pub mod util; diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs new file mode 100644 index 00000000000..53baad4d73a --- /dev/null +++ b/crates/core/src/runtime.rs @@ -0,0 +1,3 @@ +//! Runtime boundary re-exported for core call sites. + +pub use spacetimedb_runtime::{current_handle_or_new_runtime, Handle, Runtime, RuntimeDispatch, RuntimeTimeout}; diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 83760252a5e..f82d36286d4 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2061,7 +2061,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: rt, + runtime: crate::runtime::RuntimeDispatch::tokio(rt), }), None, 0, diff --git a/crates/durability/Cargo.toml b/crates/durability/Cargo.toml index 0ea8022fcbe..4eaa3870001 100644 --- a/crates/durability/Cargo.toml +++ b/crates/durability/Cargo.toml @@ -21,6 +21,7 @@ scopeguard.workspace = true spacetimedb-commitlog.workspace = true spacetimedb-fs-utils.workspace = true spacetimedb-paths.workspace = true +spacetimedb-runtime = { workspace = true, features = ["tokio"] } spacetimedb-sats.workspace = true thiserror.workspace = true tokio.workspace = true diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 5cc03099ab6..51d89e2e848 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -10,7 +10,6 @@ use std::{ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; -use scopeguard::ScopeGuard; use spacetimedb_commitlog::{ error, payload::Txdata, @@ -19,11 +18,9 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; +use spacetimedb_runtime::RuntimeDispatch; use thiserror::Error; -use tokio::{ - sync::watch, - task::{spawn_blocking, JoinHandle}, -}; +use tokio::sync::{oneshot, watch}; use tracing::{instrument, Span}; use crate::{Close, Durability, DurableOffset, History, PreparedTx, TxOffset}; @@ -109,9 +106,9 @@ where /// This is mainly for observability purposes, and can thus be updated with /// relaxed memory ordering. queue_depth: Arc, - /// [JoinHandle] for the actor task. Contains `None` if already cancelled - /// (via [Durability::close]). - actor: Mutex>>, + /// Completion notification for the background actor. Contains `None` once + /// consumed by [`Durability::close`]. + actor_done: Mutex>>, } impl Local { @@ -119,13 +116,13 @@ impl Local { /// /// `replica_dir` must already exist. /// - /// Background tasks are spawned onto the provided tokio runtime. + /// Background tasks are spawned onto the provided runtime. /// /// We will send a message down the `on_new_segment` channel whenever we begin a new commitlog segment. /// This is used to capture a snapshot each new segment. pub fn open( replica_dir: ReplicaDir, - rt: tokio::runtime::Handle, + runtime: RuntimeDispatch, opts: Options, on_new_segment: Option>, ) -> Result { @@ -140,7 +137,7 @@ impl Local { opts.commitlog, on_new_segment, )?); - Self::open_inner(clog, rt, opts, Some(lock)) + Self::open_inner(clog, runtime, opts, Some(lock)) } } @@ -151,7 +148,7 @@ where { fn open_inner( clog: Arc, R>>, - rt: tokio::runtime::Handle, + runtime: RuntimeDispatch, opts: Options, lock: Option, ) -> Result { @@ -159,19 +156,17 @@ where let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); let (durable_tx, durable_rx) = watch::channel(clog.max_committed_offset()); - - let actor = rt.spawn( + let (actor_done_tx, actor_done_rx) = oneshot::channel(); + runtime.spawn( Actor { clog: clog.clone(), - durable_offset: durable_tx, queue_depth: queue_depth.clone(), - batch_capacity: opts.batch_capacity, - - lock, + runtime: runtime.clone(), + _lock: lock, } - .run(txdata_rx), + .run(txdata_rx, actor_done_tx), ); Ok(Self { @@ -179,7 +174,7 @@ where durable_offset: durable_rx, queue, queue_depth, - actor: Mutex::new(Some(actor)), + actor_done: Mutex::new(Some(actor_done_rx)), }) } @@ -195,10 +190,10 @@ where R: RepoWithoutLockFile + Send + Sync + 'static, { /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { + pub fn open_with_repo(repo: R, runtime: RuntimeDispatch, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); - Self::open_inner(clog, rt, opts, None) + Self::open_inner(clog, runtime, opts, None) } } @@ -246,9 +241,9 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, + runtime: RuntimeDispatch, - #[allow(unused)] - lock: Option, + _lock: Option, } impl Actor @@ -257,7 +252,7 @@ where R: Repo + Send + Sync + 'static, { #[instrument(name = "durability::local::actor", skip_all)] - async fn run(self, transactions_rx: async_channel::Receiver>>) { + async fn run(self, transactions_rx: async_channel::Receiver>>, done: oneshot::Sender<()>) { info!("starting durability actor"); let mut tx_buf = Vec::with_capacity(self.batch_capacity.get()); @@ -281,14 +276,14 @@ where let clog = self.clog.clone(); let ready_len = tx_buf.len(); self.queue_depth.fetch_sub(ready_len as u64, Relaxed); - tx_buf = spawn_blocking(move || -> io::Result>>> { + let runtime = self.runtime.clone(); + tx_buf = runtime.spawn_blocking(move || -> io::Result>>> { for tx in tx_buf.drain(..) { clog.commit([tx.into_transaction()])?; } Ok(tx_buf) }) .await - .expect("commitlog write panicked") .expect("commitlog write failed"); if self.flush_and_sync().await.is_err() { sync_on_exit = false; @@ -305,6 +300,7 @@ where } info!("exiting durability actor"); + let _ = done.send(()); } #[instrument(skip_all)] @@ -318,12 +314,13 @@ where let clog = self.clog.clone(); let span = Span::current(); - spawn_blocking(move || { + let runtime = self.runtime.clone(); + runtime + .spawn_blocking(move || { let _span = span.enter(); clog.flush_and_sync() }) .await - .expect("commitlog flush-and-sync blocking task panicked") .inspect_err(|e| warn!("error flushing commitlog: {e:#}")) .inspect(|maybe_offset| { if let Some(new_offset) = maybe_offset { @@ -356,29 +353,14 @@ where info!("close local durability"); let durable_offset = self.durable_tx_offset(); - let maybe_actor = self.actor.lock().unwrap().take(); - // Abort actor if shutdown future is dropped. - let abort = scopeguard::guard( - maybe_actor.as_ref().map(|join_handle| join_handle.abort_handle()), - |maybe_abort_handle| { - if let Some(abort_handle) = maybe_abort_handle { - warn!("close future dropped, aborting durability actor"); - abort_handle.abort(); - } - }, - ); + let maybe_actor_done = self.actor_done.lock().unwrap().take(); self.queue.close(); async move { - if let Some(actor) = maybe_actor - && let Err(e) = actor.await + if let Some(actor_done) = maybe_actor_done + && actor_done.await.is_err() { - // Will print "durability actor: task was cancelled" - // or "durability actor: task panicked [...]" - warn!("durability actor: {e}"); + warn!("durability actor completion signal dropped"); } - // Don't abort if the actor completed. - let _ = ScopeGuard::into_inner(abort); - durable_offset.last_seen() } .boxed() diff --git a/crates/durability/src/imp/mod.rs b/crates/durability/src/imp/mod.rs index 3e00ae21ee1..77f0998e6f8 100644 --- a/crates/durability/src/imp/mod.rs +++ b/crates/durability/src/imp/mod.rs @@ -56,4 +56,9 @@ mod testing { future::ready(*self.durable_offset.borrow()).boxed() } } + + #[cfg(test)] + mod tests { + use super::*; + } } diff --git a/crates/durability/tests/io/fallocate.rs b/crates/durability/tests/io/fallocate.rs index 64e50faf4cc..be5ee61bc0b 100644 --- a/crates/durability/tests/io/fallocate.rs +++ b/crates/durability/tests/io/fallocate.rs @@ -161,7 +161,7 @@ async fn local_durability( ) -> Result, spacetimedb_durability::local::OpenError> { spacetimedb_durability::Local::open( dir, - tokio::runtime::Handle::current(), + spacetimedb_runtime::RuntimeDispatch::tokio_current(), spacetimedb_durability::local::Options { commitlog: spacetimedb_commitlog::Options { max_segment_size, diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml new file mode 100644 index 00000000000..6f62e0e6b08 --- /dev/null +++ b/crates/runtime/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "spacetimedb-runtime" +version.workspace = true +edition.workspace = true +license-file = "LICENSE" +description = "Runtime and deterministic simulation utilities for SpacetimeDB" +rust-version.workspace = true + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +futures.workspace = true +futures-util.workspace = true +tokio = { workspace = true, optional = true } +async-task = { version = "4.4", optional = true } +libc = { version = "0.2", optional = true } +tracing = { workspace = true, optional = true } + +[features] +default = ["tokio"] +tokio = ["dep:tokio"] +simulation = ["dep:async-task", "dep:libc", "dep:tracing"] diff --git a/crates/runtime/LICENSE b/crates/runtime/LICENSE new file mode 100644 index 00000000000..daef5135277 --- /dev/null +++ b/crates/runtime/LICENSE @@ -0,0 +1,731 @@ +SPACETIMEDB BUSINESS SOURCE LICENSE AGREEMENT + +Business Source License 1.1 + +Parameters + +Licensor: Clockwork Laboratories, Inc. +Licensed Work: SpacetimeDB 2.2.0 + The Licensed Work is + (c) 2023 Clockwork Laboratories, Inc. + +Additional Use Grant: You may make use of the Licensed Work provided your + application or service uses the Licensed Work with no + more than one SpacetimeDB instance in production and + provided that you do not use the Licensed Work for a + Database Service. + + A “Database Service” is a commercial offering that + allows third parties (other than your employees and + contractors) to access the functionality of the + Licensed Work by creating tables whose schemas are + controlled by such third parties. + +Change Date: 2031-04-29 + +Change License: GNU Affero General Public License v3.0 with a linking + exception + +For information about alternative licensing arrangements for the Software, +please visit: https://spacetimedb.com + +Notice + +The Business Source License (this document, or the “License”) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved. +“Business Source License” is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Business Source License 1.1 + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +TITLE. + +MariaDB hereby grants you permission to use this License’s text to license +your works, and to refer to it using the trademark “Business Source License”, +as long as you comply with the Covenants of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the “Business +Source License” name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where “compatible” means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text “None”. + +3. To specify a Change Date. + +4. Not to modify this License in any other way. + +----------------------------------------------------------------------------- + +Copyright (C) 2023 Clockwork Laboratories, Inc. + +This program is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License, version 3, as published +by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU Affero General Public License +along with this program; if not, see . + +Additional permission under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or combining it +with SpacetimeDB (or a modified version of that library), containing parts +covered by the terms of the AGPL v3.0, the licensors of this Program grant +you additional permission to convey the resulting work. + +Additional permission under GNU AGPL version 3 section 13 + +If you modify this Program, or any covered work, by linking or combining it +with SpacetimeDB (or a modified version of that library), containing parts +covered by the terms of the AGPL v3.0, the licensors of this Program grant +you additional permission that, notwithstanding any other provision of this +License, you need not prominently offer all users interacting with your +modified version remotely through a computer network an opportunity to +receive the Corresponding Source of your version from a network server at no +charge, if your version supports such interaction. This permission does not +waive or modify any other obligations or terms of the AGPL v3.0, except for +the specific requirement set forth in section 13. + +A copy of the AGPL v3.0 license is reproduced below. + + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + +Copyright © 2007 Free Software Foundation, Inc. +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble +The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + +The licenses for most software and other practical works are designed to take +away your freedom to share and change the works. By contrast, our General +Public Licenses are intended to guarantee your freedom to share and change +all versions of a program--to make sure it remains free software for all its +users. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for them if you wish), that +you receive source code or can get it if you want it, that you can change the +software or use pieces of it in new free programs, and that you know you can +do these things. + +Developers that use our General Public Licenses protect your rights with two +steps: (1) assert copyright on the software, and (2) offer you this License +which gives you legal permission to copy, distribute and/or modify the +software. + +A secondary benefit of defending all users' freedom is that improvements made +in alternate versions of the program, if they receive widespread use, become +available for other developers to incorporate. Many developers of free +software are heartened and encouraged by the resulting cooperation. However, +in the case of software used on network servers, this result may fail to come +about. The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its source +code to the public. + +The GNU Affero General Public License is designed specifically to ensure +that, in such cases, the modified source code becomes available to the +community. It requires the operator of a network server to provide the source +code of the modified version running there to the users of that server. +Therefore, public use of a modified version, on a publicly accessible server, +gives the public access to the source code of the modified version. + +An older license, called the Affero General Public License and published by +Affero, was designed to accomplish similar goals. This is a different +license, not a version of the Affero GPL, but Affero has released a new +version of the Affero GPL which permits relicensing under this license. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS +0. Definitions. +"This License" refers to version 3 of the GNU Affero General Public License. + +"Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + +"The Program" refers to any copyrightable work licensed under this License. +Each licensee is addressed as "you". "Licensees" and "recipients" may be +individuals or organizations. + +To "modify" a work means to copy from or adapt all or part of the work in a +fashion requiring copyright permission, other than the making of an exact +copy. The resulting work is called a "modified version" of the earlier work +or a work "based on" the earlier work. + +A "covered work" means either the unmodified Program or a work based on the +Program. + +To "propagate" a work means to do anything with it that, without permission, +would make you directly or secondarily liable for infringement under +applicable copyright law, except executing it on a computer or modifying a +private copy. Propagation includes copying, distribution (with or without +modification), making available to the public, and in some countries other +activities as well. + +To "convey" a work means any kind of propagation that enables other parties +to make or receive copies. Mere interaction with a user through a computer +network, with no transfer of a copy, is not conveying. + +An interactive user interface displays "Appropriate Legal Notices" to the +extent that it includes a convenient and prominently visible feature that (1) +displays an appropriate copyright notice, and (2) tells the user that there +is no warranty for the work (except to the extent that warranties are +provided), that licensees may convey the work under this License, and how to +view a copy of this License. If the interface presents a list of user +commands or options, such as a menu, a prominent item in the list meets this +criterion. + +1. Source Code. +The "source code" for a work means the preferred form of the work for making +modifications to it. "Object code" means any non-source form of a work. + +A "Standard Interface" means an interface that either is an official standard +defined by a recognized standards body, or, in the case of interfaces +specified for a particular programming language, one that is widely used +among developers working in that language. + +The "System Libraries" of an executable work include anything, other than the +work as a whole, that (a) is included in the normal form of packaging a Major +Component, but which is not part of that Major Component, and (b) serves only +to enable use of the work with that Major Component, or to implement a +Standard Interface for which an implementation is available to the public in +source code form. A "Major Component", in this context, means a major +essential component (kernel, window system, and so on) of the specific +operating system (if any) on which the executable work runs, or a compiler +used to produce the work, or an object code interpreter used to run it. + +The "Corresponding Source" for a work in object code form means all the +source code needed to generate, install, and (for an executable work) run the +object code and to modify the work, including scripts to control those +activities. However, it does not include the work's System Libraries, or +general-purpose tools or generally available free programs which are used +unmodified in performing those activities but which are not part of the work. +For example, Corresponding Source includes interface definition files +associated with source files for the work, and the source code for shared +libraries and dynamically linked subprograms that the work is specifically +designed to require, such as by intimate data communication or control flow +between those subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate +automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. + +2. Basic Permissions. +All rights granted under this License are granted for the term of copyright +on the Program, and are irrevocable provided the stated conditions are met. +This License explicitly affirms your unlimited permission to run the +unmodified Program. The output from running a covered work is covered by this +License only if the output, given its content, constitutes a covered work. +This License acknowledges your rights of fair use or other equivalent, as +provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without +conditions so long as your license otherwise remains in force. You may convey +covered works to others for the sole purpose of having them make +modifications exclusively for you, or provide you with facilities for running +those works, provided that you comply with the terms of this License in +conveying all material for which you do not control copyright. Those thus +making or running the covered works for you must do so exclusively on your +behalf, under your direction and control, on terms that prohibit them from +making any copies of your copyrighted material outside their relationship +with you. + +Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 makes it +unnecessary. + +3. Protecting Users' Legal Rights From Anti-Circumvention Law. +No covered work shall be deemed part of an effective technological measure +under any applicable law fulfilling obligations under article 11 of the WIPO +copyright treaty adopted on 20 December 1996, or similar laws prohibiting or +restricting circumvention of such measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention is +effected by exercising rights under this License with respect to the covered +work, and you disclaim any intention to limit operation or modification of +the work as a means of enforcing, against the work's users, your or third +parties' legal rights to forbid circumvention of technological measures. + +4. Conveying Verbatim Copies. +You may convey verbatim copies of the Program's source code as you receive +it, in any medium, provided that you conspicuously and appropriately publish +on each copy an appropriate copyright notice; keep intact all notices stating +that this License and any non-permissive terms added in accord with section 7 +apply to the code; keep intact all notices of the absence of any warranty; +and give all recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you +may offer support or warranty protection for a fee. + +5. Conveying Modified Source Versions. +You may convey a work based on the Program, or the modifications to produce +it from the Program, in the form of source code under the terms of section 4, +provided that you also meet all of these conditions: + +a) The work must carry prominent notices stating that you modified it, and +giving a relevant date. +b) The work must carry prominent notices stating that it is released under +this License and any conditions added under section 7. This requirement +modifies the requirement in section 4 to "keep intact all notices". +c) You must license the entire work, as a whole, under this License to anyone +who comes into possession of a copy. This License will therefore apply, along +with any applicable section 7 additional terms, to the whole of the work, and +all its parts, regardless of how they are packaged. This License gives no +permission to license the work in any other way, but it does not invalidate +such permission if you have separately received it. +d) If the work has interactive user interfaces, each must display Appropriate +Legal Notices; however, if the Program has interactive interfaces that do not +display Appropriate Legal Notices, your work need not make them do so. +A compilation of a covered work with other separate and independent works, +which are not by their nature extensions of the covered work, and which are +not combined with it such as to form a larger program, in or on a volume of a +storage or distribution medium, is called an "aggregate" if the compilation +and its resulting copyright are not used to limit the access or legal rights +of the compilation's users beyond what the individual works permit. Inclusion +of a covered work in an aggregate does not cause this License to apply to the +other parts of the aggregate. + +6. Conveying Non-Source Forms. +You may convey a covered work in object code form under the terms of sections +4 and 5, provided that you also convey the machine-readable Corresponding +Source under the terms of this License, in one of these ways: + +a) Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by the Corresponding Source fixed +on a durable physical medium customarily used for software interchange. +b) Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by a written offer, valid for at +least three years and valid for as long as you offer spare parts or customer +support for that product model, to give anyone who possesses the object code +either (1) a copy of the Corresponding Source for all the software in the +product that is covered by this License, on a durable physical medium +customarily used for software interchange, for a price no more than your +reasonable cost of physically performing this conveying of source, or (2) +access to copy the Corresponding Source from a network server at no charge. +c) Convey individual copies of the object code with a copy of the written +offer to provide the Corresponding Source. This alternative is allowed only +occasionally and noncommercially, and only if you received the object code +with such an offer, in accord with subsection 6b. +d) Convey the object code by offering access from a designated place (gratis +or for a charge), and offer equivalent access to the Corresponding Source in +the same way through the same place at no further charge. You need not +require recipients to copy the Corresponding Source along with the object +code. If the place to copy the object code is a network server, the +Corresponding Source may be on a different server (operated by you or a third +party) that supports equivalent copying facilities, provided you maintain +clear directions next to the object code saying where to find the +Corresponding Source. Regardless of what server hosts the Corresponding +Source, you remain obligated to ensure that it is available for as long as +needed to satisfy these requirements. +e) Convey the object code using peer-to-peer transmission, provided you +inform other peers where the object code and Corresponding Source of the work +are being offered to the general public at no charge under subsection 6d. +A separable portion of the object code, whose source code is excluded from +the Corresponding Source as a System Library, need not be included in +conveying the object code work. + +A "User Product" is either (1) a "consumer product", which means any tangible +personal property which is normally used for personal, family, or household +purposes, or (2) anything designed or sold for incorporation into a dwelling. +In determining whether a product is a consumer product, doubtful cases shall +be resolved in favor of coverage. For a particular product received by a +particular user, "normally used" refers to a typical or common use of that +class of product, regardless of the status of the particular user or of the +way in which the particular user actually uses, or expects or is expected to +use, the product. A product is a consumer product regardless of whether the +product has substantial commercial, industrial or non-consumer uses, unless +such uses represent the only significant mode of use of the product. + +"Installation Information" for a User Product means any methods, procedures, +authorization keys, or other information required to install and execute +modified versions of a covered work in that User Product from a modified +version of its Corresponding Source. The information must suffice to ensure +that the continued functioning of the modified object code is in no case +prevented or interfered with solely because modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as part of +a transaction in which the right of possession and use of the User Product is +transferred to the recipient in perpetuity or for a fixed term (regardless of +how the transaction is characterized), the Corresponding Source conveyed +under this section must be accompanied by the Installation Information. But +this requirement does not apply if neither you nor any third party retains +the ability to install modified object code on the User Product (for example, +the work has been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates for +a work that has been modified or installed by the recipient, or for the User +Product in which it has been modified or installed. Access to a network may +be denied when the modification itself materially and adversely affects the +operation of the network or violates the rules and protocols for +communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in +accord with this section must be in a format that is publicly documented (and +with an implementation available to the public in source code form), and must +require no special password or key for unpacking, reading or copying. + +7. Additional Terms. +"Additional permissions" are terms that supplement the terms of this License +by making exceptions from one or more of its conditions. Additional +permissions that are applicable to the entire Program shall be treated as +though they were included in this License, to the extent that they are valid +under applicable law. If additional permissions apply only to part of the +Program, that part may be used separately under those permissions, but the +entire Program remains governed by this License without regard to the +additional permissions. + +When you convey a copy of a covered work, you may at your option remove any +additional permissions from that copy, or from any part of it. (Additional +permissions may be written to require their own removal in certain cases when +you modify the work.) You may place additional permissions on material, added +by you to a covered work, for which you have or can give appropriate +copyright permission. + +Notwithstanding any other provision of this License, for material you add to +a covered work, you may (if authorized by the copyright holders of that +material) supplement the terms of this License with terms: + +a) Disclaiming warranty or limiting liability differently from the terms of +sections 15 and 16 of this License; or +b) Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices displayed +by works containing it; or +c) Prohibiting misrepresentation of the origin of that material, or requiring +that modified versions of such material be marked in reasonable ways as +different from the original version; or +d) Limiting the use for publicity purposes of names of licensors or authors +of the material; or +e) Declining to grant rights under trademark law for use of some trade names, +trademarks, or service marks; or +f) Requiring indemnification of licensors and authors of that material by +anyone who conveys the material (or modified versions of it) with contractual +assumptions of liability to the recipient, for any liability that these +contractual assumptions directly impose on those licensors and authors. +All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is governed +by this License along with a term that is a further restriction, you may +remove that term. If a license document contains a further restriction but +permits relicensing or conveying under this License, you may add to a covered +work material governed by the terms of that license document, provided that +the further restriction does not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must +place, in the relevant source files, a statement of the additional terms that +apply to those files, or a notice indicating where to find the applicable +terms. + +Additional terms, permissive or non-permissive, may be stated in the form of +a separately written license, or stated as exceptions; the above requirements +apply either way. + +8. Termination. +You may not propagate or modify a covered work except as expressly provided +under this License. Any attempt otherwise to propagate or modify it is void, +and will automatically terminate your rights under this License (including +any patent licenses granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a +particular copyright holder is reinstated (a) provisionally, unless and until +the copyright holder explicitly and finally terminates your license, and (b) +permanently, if the copyright holder fails to notify you of the violation by +some reasonable means prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated +permanently if the copyright holder notifies you of the violation by some +reasonable means, this is the first time you have received notice of +violation of this License (for any work) from that copyright holder, and you +cure the violation prior to 30 days after your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses +of parties who have received copies or rights from you under this License. If +your rights have been terminated and not permanently reinstated, you do not +qualify to receive new licenses for the same material under section 10. + +9. Acceptance Not Required for Having Copies. +You are not required to accept this License in order to receive or run a copy +of the Program. Ancillary propagation of a covered work occurring solely as a +consequence of using peer-to-peer transmission to receive a copy likewise +does not require acceptance. However, nothing other than this License grants +you permission to propagate or modify any covered work. These actions +infringe copyright if you do not accept this License. Therefore, by modifying +or propagating a covered work, you indicate your acceptance of this License +to do so. + +10. Automatic Licensing of Downstream Recipients. +Each time you convey a covered work, the recipient automatically receives a +license from the original licensors, to run, modify and propagate that work, +subject to this License. You are not responsible for enforcing compliance by +third parties with this License. + +An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered work +results from an entity transaction, each party to that transaction who +receives a copy of the work also receives whatever licenses to the work the +party's predecessor in interest had or could give under the previous +paragraph, plus a right to possession of the Corresponding Source of the work +from the predecessor in interest, if the predecessor has it or can get it +with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights +granted or affirmed under this License. For example, you may not impose a +license fee, royalty, or other charge for exercise of rights granted under +this License, and you may not initiate litigation (including a cross-claim or +counterclaim in a lawsuit) alleging that any patent claim is infringed by +making, using, selling, offering for sale, or importing the Program or any +portion of it. + +11. Patents. +A "contributor" is a copyright holder who authorizes use under this License +of the Program or a work on which the Program is based. The work thus +licensed is called the contributor's "contributor version". + +A contributor's "essential patent claims" are all patent claims owned or +controlled by the contributor, whether already acquired or hereafter +acquired, that would be infringed by some manner, permitted by this License, +of making, using, or selling its contributor version, but do not include +claims that would be infringed only as a consequence of further modification +of the contributor version. For purposes of this definition, "control" +includes the right to grant patent sublicenses in a manner consistent with +the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent +license under the contributor's essential patent claims, to make, use, sell, +offer for sale, import and otherwise run, modify and propagate the contents +of its contributor version. + +In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent (such +as an express permission to practice a patent or covenant not to sue for +patent infringement). To "grant" such a patent license to a party means to +make such an agreement or commitment not to enforce a patent against the +party. + +If you convey a covered work, knowingly relying on a patent license, and the +Corresponding Source of the work is not available for anyone to copy, free of +charge and under the terms of this License, through a publicly available +network server or other readily accessible means, then you must either (1) +cause the Corresponding Source to be so available, or (2) arrange to deprive +yourself of the benefit of the patent license for this particular work, or +(3) arrange, in a manner consistent with the requirements of this License, to +extend the patent license to downstream recipients. "Knowingly relying" means +you have actual knowledge that, but for the patent license, your conveying +the covered work in a country, or your recipient's use of the covered work in +a country, would infringe one or more identifiable patents in that country +that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, +you convey, or propagate by procuring conveyance of, a covered work, and +grant a patent license to some of the parties receiving the covered work +authorizing them to use, propagate, modify or convey a specific copy of the +covered work, then the patent license you grant is automatically extended to +all recipients of the covered work and works based on it. + +A patent license is "discriminatory" if it does not include within the scope +of its coverage, prohibits the exercise of, or is conditioned on the +non-exercise of one or more of the rights that are specifically granted under +this License. You may not convey a covered work if you are a party to an +arrangement with a third party that is in the business of distributing +software, under which you make payment to the third party based on the extent +of your activity of conveying the work, and under which the third party +grants, to any of the parties who would receive the covered work from you, a +discriminatory patent license (a) in connection with copies of the covered +work conveyed by you (or copies made from those copies), or (b) primarily for +and in connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any +implied license or other defenses to infringement that may otherwise be +available to you under applicable patent law. + +12. No Surrender of Others' Freedom. +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not excuse +you from the conditions of this License. If you cannot convey a covered work +so as to satisfy simultaneously your obligations under this License and any +other pertinent obligations, then as a consequence you may not convey it at +all. For example, if you agree to terms that obligate you to collect a +royalty for further conveying from those to whom you convey the Program, the +only way you could satisfy both those terms and this License would be to +refrain entirely from conveying the Program. + +13. Remote Network Interaction; Use with the GNU General Public License. +Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users interacting +with it remotely through a computer network (if your version supports such +interaction) an opportunity to receive the Corresponding Source of your +version by providing access to the Corresponding Source from a network server +at no charge, through some standard or customary means of facilitating +copying of software. This Corresponding Source shall include the +Corresponding Source for any work covered by version 3 of the GNU General +Public License that is incorporated pursuant to the following paragraph. + +Notwithstanding any other provision of this License, you have permission to +link or combine any covered work with a work licensed under version 3 of the +GNU General Public License into a single combined work, and to convey the +resulting work. The terms of this License will continue to apply to the part +which is the covered work, but the work with which it is combined will remain +governed by version 3 of the GNU General Public License. + +14. Revised Versions of this License. +The Free Software Foundation may publish revised and/or new versions of the +GNU Affero General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU Affero General Public +License "or any later version" applies to it, you have the option of +following the terms and conditions either of that numbered version or of any +later version published by the Free Software Foundation. If the Program does +not specify a version number of the GNU Affero General Public License, you +may choose any version ever published by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the +GNU Affero General Public License can be used, that proxy's public statement +of acceptance of a version permanently authorizes you to choose that version +for the Program. + +Later license versions may give you additional or different permissions. +However, no additional obligations are imposed on any author or copyright +holder as a result of your choosing to follow a later version. + +15. Disclaimer of Warranty. +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE +LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, +EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE +ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. +SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY +SERVICING, REPAIR OR CORRECTION. + +16. Limitation of Liability. +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL +ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE +PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE +OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR +DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR +A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH +HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +17. Interpretation of Sections 15 and 16. +If the disclaimer of warranty and limitation of liability provided above +cannot be given local legal effect according to their terms, reviewing courts +shall apply local law that most closely approximates an absolute waiver of +all civil liability in connection with the Program, unless a warranty or +assumption of liability accompanies a copy of the Program in return for a +fee. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs +If you develop a new program, and you want it to be of the greatest possible +use to the public, the best way to achieve this is to make it free software +which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach +them to the start of each source file to most effectively state the exclusion +of warranty; and each file should have at least the "copyright" line and a +pointer to where the full notice is found. + +SpacetimeDB: A database which replaces your server. +Copyright (C) 2023 Clockwork Laboratories, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +Also add information on how to contact you by electronic and paper mail. + +If your software can interact with users remotely through a computer network, +you should also make sure that it provides a way for users to get its source. +For example, if your program is a web application, its interface could +display a "Source" link that leads users to an archive of the code. There are +many ways you could offer source, and different solutions will be better for +different programs; see section 13 for the specific requirements. + +You should also get your employer (if you work as a programmer) or school, if +any, to sign a "copyright disclaimer" for the program, if necessary. For more +information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/crates/runtime/README.md b/crates/runtime/README.md new file mode 100644 index 00000000000..f26134ba7bd --- /dev/null +++ b/crates/runtime/README.md @@ -0,0 +1,182 @@ +# spacetimedb-runtime + +`spacetimedb-runtime` is the small runtime abstraction layer shared by core +code and DST. It exists for one reason: code such as durability and +snapshotting needs to spawn work, run blocking sections, and wait with +timeouts, but we want that same code to run on either: + +- real Tokio in production, or +- the deterministic DST simulator in tests. + +The crate keeps that boundary narrow. Most callers should depend on +`RuntimeDispatch` instead of reaching directly for Tokio or simulator internals. + +## Top-level API + +The top-level module in [src/lib.rs](./src/lib.rs) exposes: + +- `RuntimeDispatch` + A small tagged runtime handle with two backends: + - `Tokio(tokio::runtime::Handle)` when the `tokio` feature is enabled + - `Simulation(sim::Handle)` when the `simulation` feature is enabled +- `spawn(...)` + Fire-and-forget task spawning. +- `spawn_blocking(...)` + Run blocking work on the runtime-appropriate backend. + On Tokio this uses `tokio::task::spawn_blocking`. + In simulation this is still scheduled through the simulator so ordering stays + deterministic. +- `timeout(...)` + Runtime-relative timeout handling. + On Tokio this uses `tokio::time::timeout`. + In simulation this uses virtual time from `sim::time`. +- `current_handle_or_new_runtime()` + Tokio convenience for production code that may or may not already be inside a + Tokio runtime. + +The design goal is intentionally modest: this crate is not a general async +framework. It is a compatibility layer for the small set of runtime operations +SpacetimeDB core code actually needs. + +## Features + +The crate has two independent backends: + +- `tokio` + Enables production runtime support and is part of the default feature set. +- `simulation` + Enables the deterministic local simulation runtime used by DST. + +Code can compile with one or both features enabled. `RuntimeDispatch` exposes +only the backends that were actually compiled in. + +## Simulation Modules + +The simulation backend lives under [src/sim](./src/sim). + +### `sim::mod` + +[src/sim/mod.rs](./src/sim/mod.rs) is the façade for the deterministic runtime. +It re-exports the main executor types and keeps the public surface small: + +- `Runtime` + Owns the simulator executor. +- `Handle` + Cloneable access to that executor from spawned tasks. +- `NodeId` + Logical node identifier used to group and pause/resume work. +- `JoinHandle` + Awaitable handle for spawned simulated tasks. +- `yield_now` + Cooperative yield point inside the simulator. +- `time` + Virtual time utilities. +- `Rng` and `DecisionSource` + Deterministic randomness primitives. + +It also exposes small helpers such as `advance_time(...)` and +`decision_source(...)`. + +### `sim::executor` + +[src/sim/executor.rs](./src/sim/executor.rs) is the heart of the simulator. + +It provides a single-threaded async executor adapted from madsim's task loop: + +- tasks are stored as `async_task` runnables +- ready work is chosen by a deterministic RNG instead of an OS/runtime scheduler +- node state can be paused and resumed +- a thread-local handle context makes the current simulation runtime accessible + from inside spawned work +- determinism can be checked by replaying the same future twice and comparing + the sequence of scheduler decisions + +Important behavior: + +- `Runtime::block_on(...)` drives the whole simulation +- `Handle::spawn_on(...)` schedules work onto a logical node +- absence of runnable work and absence of future timer wakeups is treated as a + hang, which is exactly what DST wants + +This module is the reason `RuntimeDispatch::Simulation` can behave like a real +runtime without giving up reproducibility. + +### `sim::time` + +[src/sim/time.rs](./src/sim/time.rs) implements virtual time. + +It provides: + +- `now()` + Current simulated time. +- `sleep(duration)` + A future that completes when simulated time reaches the deadline. +- `timeout(duration, future)` + Race a future against simulated time. +- `advance(duration)` + Move time forward explicitly. + +Internally it maintains: + +- a current `Duration` +- timer registrations keyed by deadline +- wakeups for due timers + +The executor uses this module to move time only when necessary, which keeps +tests deterministic and avoids tying correctness to wall-clock behavior. + +### `sim::rng` + +[src/sim/rng.rs](./src/sim/rng.rs) provides deterministic randomness. + +There are two layers: + +- `Rng` + Stateful deterministic RNG used by the executor and runtime internals. +- `DecisionSource` + Small lock-free source for probabilistic choices in test/workload code. + +This module also does two extra jobs: + +- records and checks determinism checkpoints so repeated seeded runs can prove + they took the same execution path +- hooks libc randomness calls such as `getrandom` so code running inside the + simulator sees deterministic randomness instead of ambient system entropy + +That second point matters because reproducibility falls apart quickly if a +dependency reads randomness outside the simulator's control. + +### `sim::system_thread` + +[src/sim/system_thread.rs](./src/sim/system_thread.rs) prevents accidental OS +thread creation while running under simulation. + +On Unix it intercepts `pthread_attr_init` and fails fast if code tries to spawn +real system threads from inside the simulator. That protects determinism and +enforces the intended execution model: simulated tasks should run on the +simulator, not escape onto real threads. + +## How This Crate Is Intended To Be Used + +For core code: + +- accept or store `RuntimeDispatch` +- use `spawn`, `spawn_blocking`, and `timeout` +- avoid embedding raw Tokio assumptions into shared logic + +For production-only code: + +- use `RuntimeDispatch::tokio_current()` or `RuntimeDispatch::tokio(handle)` + +For DST: + +- create `sim::Runtime` +- run the test harness with `Runtime::block_on(...)` +- pass `RuntimeDispatch::simulation_current()` into the code under test + +## Current Scope + +This crate is intentionally narrow. It is not trying to replace Tokio, and it +is not a generic distributed simulator. It currently provides exactly the +runtime seams needed by SpacetimeDB components that must run both in production +and under deterministic simulation. diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs new file mode 100644 index 00000000000..7633ef08e40 --- /dev/null +++ b/crates/runtime/src/lib.rs @@ -0,0 +1,122 @@ +//! Runtime and deterministic simulation utilities shared by core and DST. + +use std::{fmt, future::Future, time::Duration}; + +#[cfg(feature = "simulation")] +pub mod sim; + +#[cfg(feature = "tokio")] +pub type Handle = tokio::runtime::Handle; +#[cfg(feature = "tokio")] +pub type Runtime = tokio::runtime::Runtime; + +#[derive(Clone)] +pub enum RuntimeDispatch { + #[cfg(feature = "tokio")] + Tokio(Handle), + #[cfg(feature = "simulation")] + Simulation(sim::Handle), +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RuntimeTimeout; + +impl fmt::Display for RuntimeTimeout { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("runtime operation timed out") + } +} + +impl std::error::Error for RuntimeTimeout {} + +impl RuntimeDispatch { + #[cfg(feature = "tokio")] + pub fn tokio(handle: Handle) -> Self { + Self::Tokio(handle) + } + + #[cfg(feature = "tokio")] + pub fn tokio_current() -> Self { + Self::tokio(Handle::current()) + } + + #[cfg(feature = "simulation")] + pub fn simulation(handle: sim::Handle) -> Self { + Self::Simulation(handle) + } + + #[cfg(feature = "simulation")] + pub fn simulation_current() -> Self { + Self::simulation(sim::Handle::current().expect("simulation runtime is not active on this thread")) + } + + pub fn spawn(&self, future: impl Future + Send + 'static) { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = future; + match self { + #[cfg(feature = "tokio")] + Self::Tokio(handle) => { + handle.spawn(future); + } + #[cfg(feature = "simulation")] + Self::Simulation(handle) => { + handle.spawn_on(sim::NodeId::MAIN, future).detach(); + } + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } + + pub async fn spawn_blocking(&self, f: F) -> R + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = &f; + match self { + #[cfg(feature = "tokio")] + Self::Tokio(_) => tokio::task::spawn_blocking(f) + .await + .unwrap_or_else(|e| match e.try_into_panic() { + Ok(panic_payload) => std::panic::resume_unwind(panic_payload), + Err(e) => panic!("Unexpected JoinError: {e}"), + }), + #[cfg(feature = "simulation")] + Self::Simulation(handle) => handle.spawn_on(sim::NodeId::MAIN, async move { f() }).await, + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } + + pub async fn timeout( + &self, + timeout_after: Duration, + future: impl Future, + ) -> Result { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = (timeout_after, future); + match self { + #[cfg(feature = "tokio")] + Self::Tokio(_) => tokio::time::timeout(timeout_after, future) + .await + .map_err(|_| RuntimeTimeout), + #[cfg(feature = "simulation")] + Self::Simulation(_) => sim::time::timeout(timeout_after, future) + .await + .map_err(|_| RuntimeTimeout), + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } +} + +#[cfg(feature = "tokio")] +pub fn current_handle_or_new_runtime() -> anyhow::Result<(Handle, Option)> { + if let Ok(handle) = Handle::try_current() { + return Ok((handle, None)); + } + + let runtime = Runtime::new()?; + Ok((runtime.handle().clone(), Some(runtime))) +} diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs new file mode 100644 index 00000000000..765b70f631b --- /dev/null +++ b/crates/runtime/src/sim/executor.rs @@ -0,0 +1,589 @@ +//! Minimal asynchronous executor adapted from madsim's `sim/task` loop. + +use std::{ + cell::RefCell, + collections::BTreeMap, + fmt, + future::Future, + panic::AssertUnwindSafe, + pin::Pin, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, + }, + task::{Context, Poll}, + thread::{self, Thread}, + time::Duration, +}; + +use futures_util::FutureExt; + +use crate::sim::{ + rng::{enter_rng_context, DeterminismLog}, + system_thread::enter_simulation_thread, + time::{enter_time_context, TimeHandle}, + Rng, +}; + +type Runnable = async_task::Runnable; + +/// A unique identifier for a simulated node. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct NodeId(u64); + +impl NodeId { + pub const MAIN: Self = Self(0); +} + +impl fmt::Display for NodeId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// A small single-threaded runtime for DST's top-level future. +/// +/// futures are scheduled as runnables, the ready queue +/// is sampled by deterministic RNG, and pending execution without future events +/// is considered a test hang. +pub struct Runtime { + executor: Arc, +} + +impl Runtime { + pub fn new(seed: u64) -> anyhow::Result { + Ok(Self { + executor: Arc::new(Executor::new(seed)), + }) + } + + pub fn block_on(&mut self, future: F) -> F::Output { + let _handle_context = enter_handle_context(self.handle()); + self.executor.block_on(future) + } + + pub fn elapsed(&self) -> Duration { + self.executor.elapsed() + } + + pub fn handle(&self) -> Handle { + Handle { + executor: Arc::clone(&self.executor), + } + } + + pub fn create_node(&self) -> NodeId { + self.handle().create_node() + } + + pub fn pause(&self, node: NodeId) { + self.handle().pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.handle().resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.handle().spawn_on(node, future) + } + + /// Run a future twice with the same seed and fail if simulator choices diverge. + pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output + where + F: Future + 'static, + F::Output: Send + 'static, + { + Self::check_determinism_with(seed, make_future) + } + + /// Run a future twice with the same seed and fail if simulator choices diverge. + pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output + where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, + { + let first = make_future.clone(); + let log = thread::spawn(move || { + let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); + runtime.executor.enable_determinism_log(); + runtime.block_on(first()); + runtime + .executor + .take_determinism_log() + .expect("determinism log should be enabled") + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap(); + + thread::spawn(move || { + let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); + runtime.executor.enable_determinism_check(log); + let output = runtime.block_on(make_future()); + runtime + .executor + .finish_determinism_check() + .unwrap_or_else(|err| panic!("{err}")); + output + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap() + } +} + +/// Cloneable access to the simulation executor. +#[derive(Clone)] +pub struct Handle { + executor: Arc, +} + +impl Handle { + pub fn current() -> Option { + current_handle() + } + + pub fn create_node(&self) -> NodeId { + self.executor.create_node() + } + + pub fn pause(&self, node: NodeId) { + self.executor.pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.executor.resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.executor.spawn_on(node, future) + } + + pub fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + 'static, + F::Output: 'static, + { + self.executor.spawn_local_on(node, future) + } +} + +thread_local! { + static CURRENT_HANDLE: RefCell> = RefCell::new(None); +} + +pub(crate) fn current_handle() -> Option { + CURRENT_HANDLE.with(|handle| handle.borrow().clone()) +} + +fn enter_handle_context(handle: Handle) -> HandleContextGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); + HandleContextGuard { previous } +} + +struct HandleContextGuard { + previous: Option, +} + +impl Drop for HandleContextGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + *slot.borrow_mut() = self.previous.take(); + }); + } +} + +/// A spawned simulated task. +pub struct JoinHandle { + task: async_task::Task, +} + +impl JoinHandle { + pub fn detach(self) { + self.task.detach(); + } +} + +impl Future for JoinHandle { + type Output = T; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + Pin::new(&mut self.task).poll(cx) + } +} + +fn panic_with_seed(seed: u64, payload: Box) -> ! { + eprintln!("note: run with --seed {seed} to reproduce this error"); + std::panic::resume_unwind(payload); +} + +struct Executor { + queue: Receiver, + sender: Sender, + nodes: Mutex>>, + next_node: std::sync::atomic::AtomicU64, + rng: Arc>, + time: TimeHandle, +} + +impl Executor { + fn new(seed: u64) -> Self { + let queue = Queue::new(); + let mut nodes = BTreeMap::new(); + nodes.insert(NodeId::MAIN, Arc::new(NodeState::default())); + Self { + queue: queue.receiver(), + sender: queue.sender(), + nodes: Mutex::new(nodes), + next_node: std::sync::atomic::AtomicU64::new(1), + rng: Arc::new(Mutex::new(Rng::new(seed))), + time: TimeHandle::new(), + } + } + + fn elapsed(&self) -> Duration { + self.time.now() + } + + fn enable_determinism_log(&self) { + self.rng.lock().expect("sim rng poisoned").enable_determinism_log(); + } + + fn enable_determinism_check(&self, log: DeterminismLog) { + self.rng.lock().expect("sim rng poisoned").enable_determinism_check(log); + } + + fn take_determinism_log(&self) -> Option { + self.rng.lock().expect("sim rng poisoned").take_determinism_log() + } + + fn finish_determinism_check(&self) -> Result<(), String> { + self.rng.lock().expect("sim rng poisoned").finish_determinism_check() + } + + fn create_node(&self) -> NodeId { + let id = NodeId(self.next_node.fetch_add(1, Ordering::Relaxed)); + self.nodes + .lock() + .expect("nodes poisoned") + .insert(id, Arc::new(NodeState::default())); + id + } + + fn pause(&self, node: NodeId) { + self.node_state(node).paused.store(true, Ordering::Relaxed); + } + + fn resume(&self, node: NodeId) { + let state = self.node_state(node); + state.paused.store(false, Ordering::Relaxed); + + let mut paused = state.paused_queue.lock().expect("paused queue poisoned"); + for runnable in paused.drain(..) { + self.sender.send(runnable); + } + } + + fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.node_state(node); + + let sender = self.sender.clone(); + let (runnable, task) = async_task::Builder::new() + .metadata(node) + .spawn(move |_| future, move |runnable| sender.send(runnable)); + runnable.schedule(); + + JoinHandle { task } + } + + fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + 'static, + F::Output: 'static, + { + self.node_state(node); + + let sender = self.sender.clone(); + let (runnable, task) = unsafe { + async_task::Builder::new() + .metadata(node) + .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) + }; + runnable.schedule(); + + JoinHandle { task } + } + + #[track_caller] + fn block_on(&self, future: F) -> F::Output { + let _system_thread_context = enter_simulation_thread(); + let _rng_context = enter_rng_context(Arc::clone(&self.rng)); + let _time_context = enter_time_context(self.time.clone()); + let _waiter = WaiterGuard::new(&self.queue, thread::current()); + + let sender = self.sender.clone(); + let (runnable, task) = unsafe { + async_task::Builder::new() + .metadata(NodeId::MAIN) + .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) + }; + runnable.schedule(); + + loop { + self.run_all_ready(); + if task.is_finished() { + return task.now_or_never().expect("finished task should resolve"); + } + + if self.time.wake_next_timer() { + continue; + } + + panic!("no runnable tasks; all simulated tasks are blocked"); + } + } + + fn run_all_ready(&self) { + while let Some(runnable) = self.queue.try_recv_random(&self.rng) { + let node = *runnable.metadata(); + let state = self.node_state(node); + if state.paused.load(Ordering::Relaxed) { + state.paused_queue.lock().expect("paused queue poisoned").push(runnable); + continue; + } + let result = std::panic::catch_unwind(AssertUnwindSafe(|| runnable.run())); + if let Err(payload) = result { + std::panic::resume_unwind(payload); + } + } + } + + fn node_state(&self, node: NodeId) -> Arc { + self.nodes + .lock() + .expect("nodes poisoned") + .get(&node) + .cloned() + .unwrap_or_else(|| panic!("unknown simulated node {node}")) + } +} + +#[derive(Clone, Default)] +struct NodeState { + paused: Arc, + paused_queue: Arc>>, +} + +pub async fn yield_now() { + YieldNow { yielded: false }.await +} + +struct YieldNow { + yielded: bool, +} + +impl Future for YieldNow { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if self.yielded { + Poll::Ready(()) + } else { + self.yielded = true; + cx.waker().wake_by_ref(); + Poll::Pending + } + } +} + +struct WaiterGuard<'a> { + receiver: &'a Receiver, +} + +impl<'a> WaiterGuard<'a> { + fn new(receiver: &'a Receiver, thread: Thread) -> Self { + receiver.set_waiter(Some(thread)); + Self { receiver } + } +} + +impl Drop for WaiterGuard<'_> { + fn drop(&mut self) { + self.receiver.set_waiter(None); + } +} + +struct Queue { + inner: Arc, +} + +#[derive(Clone)] +struct Sender { + inner: Arc, +} + +#[derive(Clone)] +struct Receiver { + inner: Arc, +} + +struct QueueInner { + queue: Mutex>, + waiter: Mutex>, +} + +impl Queue { + fn new() -> Self { + Self { + inner: Arc::new(QueueInner { + queue: Mutex::new(Vec::new()), + waiter: Mutex::new(None), + }), + } + } + + fn sender(&self) -> Sender { + Sender { + inner: self.inner.clone(), + } + } + + fn receiver(&self) -> Receiver { + Receiver { + inner: self.inner.clone(), + } + } +} + +impl Sender { + fn send(&self, runnable: Runnable) { + self.inner.queue.lock().expect("run queue poisoned").push(runnable); + if let Some(thread) = self.inner.waiter.lock().expect("waiter poisoned").as_ref() { + thread.unpark(); + } + } +} + +impl Receiver { + fn set_waiter(&self, thread: Option) { + *self.inner.waiter.lock().expect("waiter poisoned") = thread; + } + + fn try_recv_random(&self, rng: &Mutex) -> Option { + let mut queue = self.inner.queue.lock().expect("run queue poisoned"); + if queue.is_empty() { + return None; + } + let idx = rng.lock().expect("rng poisoned").index(queue.len()); + Some(queue.swap_remove(idx)) + } +} + +#[cfg(test)] +mod tests { + use std::sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, + }; + + use super::*; + + #[test] + fn paused_node_does_not_run_until_resumed() { + let mut runtime = Runtime::new(1).unwrap(); + let node = runtime.create_node(); + runtime.pause(node); + + let runs = Arc::new(AtomicUsize::new(0)); + let task_runs = Arc::clone(&runs); + let task = runtime.spawn_on(node, async move { + task_runs.fetch_add(1, Ordering::SeqCst); + 7 + }); + + runtime.block_on(async { + yield_now().await; + }); + assert_eq!(runs.load(Ordering::SeqCst), 0); + + runtime.resume(node); + assert_eq!(runtime.block_on(task), 7); + assert_eq!(runs.load(Ordering::SeqCst), 1); + } + + #[test] + fn handle_can_spawn_onto_node_from_simulated_task() { + let mut runtime = Runtime::new(2).unwrap(); + let handle = runtime.handle(); + + let value = runtime.block_on(async move { + let node = handle.create_node(); + handle.spawn_on(node, async { 11 }).await + }); + + assert_eq!(value, 11); + } + + #[test] + fn current_handle_can_spawn_local_task_inside_runtime() { + assert!(Handle::current().is_none()); + + let mut runtime = Runtime::new(5).unwrap(); + let value = runtime.block_on(async { + let handle = Handle::current().expect("sim handle should be present inside block_on"); + let node = handle.create_node(); + let captured = std::rc::Rc::new(17); + handle + .spawn_local_on(node, async move { + yield_now().await; + *captured + }) + .await + }); + + assert_eq!(value, 17); + assert!(Handle::current().is_none()); + } + + #[test] + fn check_determinism_runs_future_twice() { + static CALLS: AtomicUsize = AtomicUsize::new(0); + CALLS.store(0, Ordering::SeqCst); + + let value = Runtime::check_determinism(3, || async { + CALLS.fetch_add(1, Ordering::SeqCst); + yield_now().await; + 13 + }); + + assert_eq!(value, 13); + assert_eq!(CALLS.load(Ordering::SeqCst), 2); + } + + #[test] + #[should_panic(expected = "non-determinism detected")] + fn check_determinism_rejects_different_scheduler_sequence() { + static FIRST_RUN: AtomicBool = AtomicBool::new(true); + FIRST_RUN.store(true, Ordering::SeqCst); + + Runtime::check_determinism(4, || async { + if FIRST_RUN.swap(false, Ordering::SeqCst) { + yield_now().await; + } + }); + } +} diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs new file mode 100644 index 00000000000..467903cf2b4 --- /dev/null +++ b/crates/runtime/src/sim/mod.rs @@ -0,0 +1,23 @@ +//! Local deterministic simulation runtime. +//! +//! This module is deliberately small, but its executor shape follows madsim's: +//! futures are scheduled as runnable tasks and the ready queue is sampled by a +//! deterministic RNG instead of being driven by a package-level async runtime. + +mod executor; +mod rng; +mod system_thread; +pub mod time; + +use std::time::Duration; + +pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; +pub use rng::{DecisionSource, Rng}; + +pub fn advance_time(duration: Duration) { + time::advance(duration); +} + +pub fn decision_source(seed: u64) -> DecisionSource { + DecisionSource::new(seed) +} diff --git a/crates/runtime/src/sim/rng.rs b/crates/runtime/src/sim/rng.rs new file mode 100644 index 00000000000..09afde03031 --- /dev/null +++ b/crates/runtime/src/sim/rng.rs @@ -0,0 +1,367 @@ +use std::{ + cell::{Cell, RefCell}, + ptr, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, OnceLock, + }, +}; + +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +#[derive(Clone, Debug)] +pub struct Rng { + seed: u64, + state: u64, + log: Option>, + check: Option<(Vec, usize)>, +} + +impl Rng { + pub fn new(seed: u64) -> Self { + unsafe { getentropy(ptr::null_mut(), 0) }; + if !init_std_random_state(seed) { + tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); + } + Self { + seed, + state: splitmix64(seed), + log: None, + check: None, + } + } + + pub fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(GAMMA); + let value = splitmix64(self.state); + self.record_checkpoint(value); + value + } + + pub fn index(&mut self, len: usize) -> usize { + assert!(len > 0, "len must be non-zero"); + (self.next_u64() as usize) % len + } + + pub fn sample_probability(&mut self, probability: f64) -> bool { + probability_sample(self.next_u64(), probability) + } + + pub(crate) fn fill_bytes(&mut self, dest: &mut [u8]) { + for chunk in dest.chunks_mut(std::mem::size_of::()) { + let bytes = self.next_u64().to_ne_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); + } + } + + pub(crate) fn enable_determinism_log(&mut self) { + self.log = Some(Vec::new()); + self.check = None; + } + + pub(crate) fn enable_determinism_check(&mut self, log: DeterminismLog) { + self.check = Some((log.0, 0)); + self.log = None; + } + + pub(crate) fn take_determinism_log(&mut self) -> Option { + self.log + .take() + .or_else(|| self.check.take().map(|(log, _)| log)) + .map(DeterminismLog) + } + + pub(crate) fn finish_determinism_check(&self) -> Result<(), String> { + if let Some((log, consumed)) = &self.check + && *consumed != log.len() + { + return Err(format!( + "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", + self.seed, + log.len() + )); + } + Ok(()) + } + + fn record_checkpoint(&mut self, value: u64) { + if self.log.is_none() && self.check.is_none() { + return; + } + + let checkpoint = checksum(value); + if let Some(log) = &mut self.log { + log.push(checkpoint); + } + if let Some((expected, consumed)) = &mut self.check { + if expected.get(*consumed) != Some(&checkpoint) { + panic!( + "non-determinism detected for seed {} at checkpoint {consumed}", + self.seed + ); + } + *consumed += 1; + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) struct DeterminismLog(Vec); + +#[derive(Debug)] +pub struct DecisionSource { + state: AtomicU64, +} + +impl DecisionSource { + pub fn new(seed: u64) -> Self { + Self { + state: AtomicU64::new(splitmix64(seed)), + } + } + + pub fn sample_probability(&self, probability: f64) -> bool { + probability_sample(self.next_u64(), probability) + } + + fn next_u64(&self) -> u64 { + let state = self.state.fetch_add(GAMMA, Ordering::Relaxed).wrapping_add(GAMMA); + splitmix64(state) + } +} + +fn probability_sample(value: u64, probability: f64) -> bool { + if probability <= 0.0 { + return false; + } + if probability >= 1.0 { + return true; + } + + // Use the top 53 bits to build an exactly representable f64 in [0, 1). + let unit = (value >> 11) as f64 * (1.0 / ((1u64 << 53) as f64)); + unit < probability +} + +fn splitmix64(mut x: u64) -> u64 { + x = x.wrapping_add(GAMMA); + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} + +fn checksum(value: u64) -> u8 { + value.to_ne_bytes().into_iter().fold(0, |acc, byte| acc ^ byte) +} + +thread_local! { + static CURRENT_RNG: RefCell>>> = const { RefCell::new(None) }; + static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; +} + +pub(crate) struct RngContextGuard { + previous: Option>>, +} + +pub(crate) fn enter_rng_context(rng: Arc>) -> RngContextGuard { + let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); + RngContextGuard { previous } +} + +impl Drop for RngContextGuard { + fn drop(&mut self) { + CURRENT_RNG.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +fn init_std_random_state(seed: u64) -> bool { + STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); + let _ = std::collections::hash_map::RandomState::new(); + STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() +} + +fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { + if buflen == 0 { + return; + } + let mut state = splitmix64(seed); + let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; + for chunk in buf.chunks_mut(std::mem::size_of::()) { + state = state.wrapping_add(GAMMA); + let bytes = splitmix64(state).to_ne_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); + } +} + +fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { + CURRENT_RNG.with(|current| { + let Some(rng) = current.borrow().clone() else { + return false; + }; + if buflen == 0 { + return true; + } + let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; + rng.lock().expect("sim rng poisoned").fill_bytes(buf); + true + }) +} + +/// Obtain random bytes through the simulation RNG when running inside the DST executor. +/// +/// This mirrors madsim's libc-level hook. It covers libc users and macOS +/// `CCRandomGenerateBytes`; crates that issue raw kernel syscalls can still +/// bypass it. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { + #[cfg(target_os = "macos")] + let _ = flags; + + if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { + fill_from_seed(buf, buflen, seed); + return buflen as isize; + } + if fill_from_current_rng(buf, buflen) { + return buflen as isize; + } + + #[cfg(target_os = "linux")] + { + type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; + static GETRANDOM: OnceLock = OnceLock::new(); + let original = GETRANDOM.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getrandom"); + std::mem::transmute(ptr) + }); + unsafe { original(buf, buflen, flags) } + } + + #[cfg(target_os = "macos")] + { + type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; + static GETENTROPY: OnceLock = OnceLock::new(); + let original = GETENTROPY.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getentropy"); + std::mem::transmute(ptr) + }); + match unsafe { original(buf, buflen) } { + -1 => -1, + 0 => buflen as isize, + _ => unreachable!("unexpected getentropy return value"), + } + } + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + let _ = (buf, buflen, flags); + compile_error!("unsupported OS for DST getrandom override"); + } +} + +/// Fill a buffer with random bytes through the same hook used by libc. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { + if buflen > 256 { + return -1; + } + match unsafe { getrandom(buf, buflen, 0) } { + -1 => -1, + _ => 0, + } +} + +/// macOS uses CommonCrypto for process randomness in newer Rust toolchains. +#[cfg(target_os = "macos")] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { + match unsafe { getrandom(bytes, count, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(test)] +mod tests { + use std::{collections::HashMap, sync::Arc}; + + use super::*; + + #[test] + fn rng_log_check_accepts_same_sequence() { + let mut first = Rng::new(10); + first.enable_determinism_log(); + let first_values = (0..8).map(|_| first.next_u64()).collect::>(); + let log = first.take_determinism_log().unwrap(); + + let mut second = Rng::new(10); + second.enable_determinism_check(log); + let second_values = (0..8).map(|_| second.next_u64()).collect::>(); + second.finish_determinism_check().unwrap(); + + assert_eq!(first_values, second_values); + } + + #[test] + fn decision_source_matches_rng_sequence() { + let source = DecisionSource::new(12); + let mut rng = Rng::new(12); + + for _ in 0..16 { + assert_eq!(source.next_u64(), rng.next_u64()); + } + } + + #[test] + #[should_panic(expected = "non-determinism detected")] + fn rng_log_check_rejects_different_sequence() { + let mut first = Rng::new(10); + first.enable_determinism_log(); + first.next_u64(); + let log = first.take_determinism_log().unwrap(); + + let mut second = Rng::new(11); + second.enable_determinism_check(log); + second.next_u64(); + } + + #[test] + fn getentropy_uses_current_sim_rng() { + let rng = Arc::new(Mutex::new(Rng::new(20))); + let _guard = enter_rng_context(Arc::clone(&rng)); + + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + + let mut expected_rng = Rng::new(20); + let mut expected = [0u8; 24]; + expected_rng.fill_bytes(&mut expected); + assert_eq!(actual, expected); + } + + #[test] + fn std_hashmap_order_is_seeded_for_runtime_thread() { + fn order_for(seed: u64) -> Vec<(u64, u64)> { + std::thread::spawn(move || { + let _rng = Rng::new(seed); + (0..12) + .map(|idx| (idx, idx)) + .collect::>() + .into_iter() + .collect() + }) + .join() + .unwrap() + } + + assert_eq!(order_for(30), order_for(30)); + } +} diff --git a/crates/runtime/src/sim/system_thread.rs b/crates/runtime/src/sim/system_thread.rs new file mode 100644 index 00000000000..f395a25442a --- /dev/null +++ b/crates/runtime/src/sim/system_thread.rs @@ -0,0 +1,64 @@ +//! Guard against creating OS threads from inside the simulator. + +use std::{cell::Cell, sync::OnceLock}; + +thread_local! { + static IN_SIMULATION: Cell = const { Cell::new(false) }; +} + +pub(crate) struct SimulationThreadGuard { + previous: bool, +} + +pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { + let previous = IN_SIMULATION.with(|state| state.replace(true)); + SimulationThreadGuard { previous } +} + +impl Drop for SimulationThreadGuard { + fn drop(&mut self) { + IN_SIMULATION.with(|state| { + state.set(self.previous); + }); + } +} + +fn in_simulation() -> bool { + IN_SIMULATION.with(Cell::get) +} + +/// Forbid creating system threads in simulation. +#[cfg(unix)] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { + if in_simulation() { + eprintln!("attempt to spawn a system thread in simulation."); + eprintln!("note: use simulator tasks instead."); + return -1; + } + + type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; + static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); + let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); + std::mem::transmute(ptr) + }); + unsafe { original(attr) } +} + +#[cfg(test)] +mod tests { + use crate::sim; + + #[test] + #[cfg(unix)] + fn runtime_forbids_system_thread_spawn() { + let mut runtime = sim::Runtime::new(200).unwrap(); + runtime.block_on(async { + let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); + assert!(result.is_err()); + }); + } +} diff --git a/crates/runtime/src/sim/time.rs b/crates/runtime/src/sim/time.rs new file mode 100644 index 00000000000..2508b35b249 --- /dev/null +++ b/crates/runtime/src/sim/time.rs @@ -0,0 +1,343 @@ +//! Virtual time for the local simulation runtime. + +use std::{ + cell::RefCell, + collections::BTreeMap, + fmt, + future::Future, + pin::Pin, + sync::{Arc, Mutex}, + task::{Context, Poll, Waker}, + time::Duration, +}; + +use futures::future::{select, Either}; + +#[derive(Clone, Debug)] +pub struct TimeHandle { + inner: Arc>, +} + +impl TimeHandle { + pub fn new() -> Self { + Self { + inner: Arc::new(Mutex::new(TimeState::default())), + } + } + + pub fn now(&self) -> Duration { + self.inner.lock().expect("sim time poisoned").now + } + + pub fn advance(&self, duration: Duration) { + if duration.is_zero() { + return; + } + + let wakers = { + let mut state = self.inner.lock().expect("sim time poisoned"); + state.now = state.now.saturating_add(duration); + state.take_due_wakers() + }; + wake_all(wakers); + } + + pub fn wake_next_timer(&self) -> bool { + let wakers = { + let mut state = self.inner.lock().expect("sim time poisoned"); + let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { + return false; + }; + if next_deadline > state.now { + state.now = next_deadline; + } + state.take_due_wakers() + }; + let woke = !wakers.is_empty(); + wake_all(wakers); + woke + } + + fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { + let mut state = self.inner.lock().expect("sim time poisoned"); + state.timers.insert( + id, + TimerEntry { + deadline, + waker: waker.clone(), + }, + ); + } + + fn cancel_timer(&self, id: TimerId) { + self.inner.lock().expect("sim time poisoned").timers.remove(&id); + } + + fn next_timer_id(&self) -> TimerId { + let mut state = self.inner.lock().expect("sim time poisoned"); + let id = TimerId(state.next_timer_id); + state.next_timer_id = state.next_timer_id.saturating_add(1); + id + } +} + +impl Default for TimeHandle { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Default)] +struct TimeState { + now: Duration, + next_timer_id: u64, + timers: BTreeMap, +} + +impl TimeState { + fn take_due_wakers(&mut self) -> Vec { + let due = self + .timers + .iter() + .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) + .collect::>(); + due.into_iter() + .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) + .collect() + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct TimerId(u64); + +#[derive(Debug)] +struct TimerEntry { + deadline: Duration, + waker: Waker, +} + +thread_local! { + static CURRENT_TIME: RefCell> = const { RefCell::new(None) }; +} + +pub struct TimeContextGuard { + previous: Option, +} + +pub fn enter_time_context(handle: TimeHandle) -> TimeContextGuard { + let previous = CURRENT_TIME.with(|current| current.replace(Some(handle))); + TimeContextGuard { previous } +} + +pub fn try_current_handle() -> Option { + CURRENT_TIME.with(|current| current.borrow().clone()) +} + +pub fn now() -> Duration { + try_current_handle().map(|handle| handle.now()).unwrap_or_default() +} + +pub fn advance(duration: Duration) { + if let Some(handle) = try_current_handle() { + handle.advance(duration); + } +} + +pub fn sleep(duration: Duration) -> Sleep { + Sleep { + duration, + state: SleepState::Unregistered, + } +} + +pub async fn timeout(duration: Duration, future: impl Future) -> Result { + futures::pin_mut!(future); + let sleep = sleep(duration); + futures::pin_mut!(sleep); + + match select(future, sleep).await { + Either::Left((output, _)) => Ok(output), + Either::Right(((), _)) => Err(TimeoutElapsed { duration }), + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct TimeoutElapsed { + duration: Duration, +} + +impl TimeoutElapsed { + pub fn duration(self) -> Duration { + self.duration + } +} + +impl fmt::Display for TimeoutElapsed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "simulated timeout elapsed after {:?}", self.duration) + } +} + +impl std::error::Error for TimeoutElapsed {} + +impl Drop for TimeContextGuard { + fn drop(&mut self) { + CURRENT_TIME.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +pub struct Sleep { + duration: Duration, + state: SleepState, +} + +enum SleepState { + Unregistered, + Registered { + handle: TimeHandle, + id: TimerId, + deadline: Duration, + }, + Done, +} + +impl Future for Sleep { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if matches!(self.state, SleepState::Done) { + return Poll::Ready(()); + } + + if matches!(self.state, SleepState::Unregistered) { + let handle = try_current_handle().expect("sim::time::sleep polled outside sim runtime"); + let deadline = handle.now().saturating_add(self.duration); + let id = handle.next_timer_id(); + self.state = SleepState::Registered { handle, id, deadline }; + } + + let SleepState::Registered { handle, id, deadline } = &self.state else { + unreachable!("sleep state should be registered or done"); + }; + + if handle.now() >= *deadline { + let handle = handle.clone(); + let id = *id; + handle.cancel_timer(id); + self.state = SleepState::Done; + Poll::Ready(()) + } else { + handle.register_timer(*id, *deadline, cx.waker()); + Poll::Pending + } + } +} + +impl Drop for Sleep { + fn drop(&mut self) { + if let SleepState::Registered { handle, id, .. } = &self.state { + handle.cancel_timer(*id); + } + } +} + +fn wake_all(wakers: Vec) { + for waker in wakers { + waker.wake(); + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::{Arc, Mutex}, + time::Duration, + }; + + use crate::sim; + + #[test] + fn sleep_fast_forwards_virtual_time() { + let mut runtime = sim::Runtime::new(101).unwrap(); + + runtime.block_on(async { + assert_eq!(super::now(), Duration::ZERO); + super::sleep(Duration::from_millis(5)).await; + assert_eq!(super::now(), Duration::from_millis(5)); + }); + } + + #[test] + fn shorter_timer_wakes_first() { + let mut runtime = sim::Runtime::new(102).unwrap(); + let handle = runtime.handle(); + let order = Arc::new(Mutex::new(Vec::new())); + + runtime.block_on({ + let order = Arc::clone(&order); + async move { + let slow_order = Arc::clone(&order); + let slow = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(10)).await; + slow_order.lock().expect("order poisoned").push(10); + }); + + let fast_order = Arc::clone(&order); + let fast = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(3)).await; + fast_order.lock().expect("order poisoned").push(3); + }); + + fast.await; + slow.await; + } + }); + + assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); + assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + } + + #[test] + fn explicit_advance_moves_virtual_time() { + let mut runtime = sim::Runtime::new(103).unwrap(); + + runtime.block_on(async { + super::advance(Duration::from_millis(7)); + assert_eq!(super::now(), Duration::from_millis(7)); + }); + } + + #[test] + fn timeout_returns_future_output_before_deadline() { + let mut runtime = sim::Runtime::new(104).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(10), async { + super::sleep(Duration::from_millis(3)).await; + 9 + }) + .await + }); + + assert_eq!(output, Ok(9)); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + } + + #[test] + fn timeout_expires_at_virtual_deadline() { + let mut runtime = sim::Runtime::new(105).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(4), async { + super::sleep(Duration::from_millis(20)).await; + 9 + }) + .await + }); + + assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + } +} diff --git a/crates/standalone/Cargo.toml b/crates/standalone/Cargo.toml index 180b3a60b4c..3bc7335625a 100644 --- a/crates/standalone/Cargo.toml +++ b/crates/standalone/Cargo.toml @@ -54,7 +54,7 @@ serde_json.workspace = true sled.workspace = true socket2.workspace = true thiserror.workspace = true -tokio.workspace = true +tokio = { workspace = true, features = ["full"] } tower-http.workspace = true toml.workspace = true tracing = { workspace = true, features = ["release_max_level_debug"] } diff --git a/crates/standalone/src/subcommands/start.rs b/crates/standalone/src/subcommands/start.rs index b407372aa34..ad1e02e788b 100644 --- a/crates/standalone/src/subcommands/start.rs +++ b/crates/standalone/src/subcommands/start.rs @@ -1,12 +1,18 @@ +#[cfg(not(simulation))] use netstat2::{get_sockets_info, AddressFamilyFlags, ProtocolFlags, ProtocolSocketInfo, TcpState}; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::identity::IdentityRoutes; +#[cfg(not(simulation))] use spacetimedb_pg::pg_server; +#[cfg(not(simulation))] use std::io::{self, Write}; +#[cfg(not(simulation))] use std::net::IpAddr; use std::sync::Arc; use crate::{StandaloneEnv, StandaloneOptions}; use anyhow::Context; +#[cfg(not(simulation))] use axum::extract::DefaultBodyLimit; use clap::ArgAction::SetTrue; use clap::{Arg, ArgMatches}; @@ -15,11 +21,14 @@ use spacetimedb::db::{self, Storage}; use spacetimedb::startup::{self, TracingOptions}; use spacetimedb::util::jobs::JobCores; use spacetimedb::worker_metrics; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::database::DatabaseRoutes; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::router; use spacetimedb_client_api::routes::subscribe::WebSocketOptions; use spacetimedb_paths::cli::{PrivKeyPath, PubKeyPath}; use spacetimedb_paths::server::{ConfigToml, ServerDataDir}; +#[cfg(not(simulation))] use tokio::net::TcpListener; pub fn cli() -> clap::Command { @@ -111,6 +120,7 @@ impl ConfigFile { pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { let listen_addr = args.get_one::("listen_addr").unwrap(); let pg_port = args.get_one::("pg_port"); + #[cfg(not(simulation))] let non_interactive = args.get_flag("non_interactive"); let cert_dir = args.get_one::("jwt_key_dir"); let certs = Option::zip( @@ -197,13 +207,26 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { ); worker_metrics::spawn_page_pool_stats(listen_addr.clone(), ctx.page_pool().clone()); worker_metrics::spawn_bsatn_rlb_pool_stats(listen_addr.clone(), ctx.bsatn_rlb_pool().clone()); + #[cfg(simulation)] + { + let _ = (pg_port, ctx, listen_addr); + anyhow::bail!("standalone start server mode is not supported under simulation"); + } + + #[cfg(not(simulation))] let mut db_routes = DatabaseRoutes::default(); - db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); - db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); - db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + #[cfg(not(simulation))] + { + db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); + db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); + db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + } + #[cfg(not(simulation))] let extra = axum::Router::new().nest("/health", spacetimedb_client_api::routes::health::router()); + #[cfg(not(simulation))] let service = router(&ctx, db_routes, IdentityRoutes::default(), extra).with_state(ctx.clone()); + #[cfg(not(simulation))] // Check if the requested port is available on both IPv4 and IPv6. // If not, offer to find an available port by incrementing (unless non-interactive). let listen_addr = if let Some((host, port_str)) = listen_addr.rsplit_once(':') { @@ -249,40 +272,44 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { listen_addr.to_string() }; - let tcp = TcpListener::bind(&listen_addr).await.context(format!( - "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" - ))?; - socket2::SockRef::from(&tcp).set_nodelay(true)?; - log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); - - if let Some(pg_port) = pg_port { - let server_addr = listen_addr.split(':').next().unwrap(); - let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( - "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + #[cfg(not(simulation))] + { + let tcp = TcpListener::bind(&listen_addr).await.context(format!( + "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" ))?; - - let notify = Arc::new(tokio::sync::Notify::new()); - let shutdown_notify = notify.clone(); - tokio::select! { - _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, - _ = axum::serve(tcp, service).with_graceful_shutdown(async move { - shutdown_notify.notified().await; - }) => {}, - _ = tokio::signal::ctrl_c() => { - println!("Shutting down servers..."); - notify.notify_waiters(); // Notify all tasks + socket2::SockRef::from(&tcp).set_nodelay(true)?; + log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); + + if let Some(pg_port) = pg_port { + let server_addr = listen_addr.split(':').next().unwrap(); + let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( + "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + ))?; + + let notify = Arc::new(tokio::sync::Notify::new()); + let shutdown_notify = notify.clone(); + tokio::select! { + _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, + _ = axum::serve(tcp, service).with_graceful_shutdown(async move { + shutdown_notify.notified().await; + }) => {}, + _ = tokio::signal::ctrl_c() => { + println!("Shutting down servers..."); + notify.notify_waiters(); // Notify all tasks + } } + } else { + log::warn!("PostgreSQL wire protocol server disabled"); + axum::serve(tcp, service) + .with_graceful_shutdown(async { + tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); + log::info!("Shutting down server..."); + }) + .await?; } - } else { - log::warn!("PostgreSQL wire protocol server disabled"); - axum::serve(tcp, service) - .with_graceful_shutdown(async { - tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); - log::info!("Shutting down server..."); - }) - .await?; } + #[cfg(not(simulation))] Ok(()) } @@ -301,6 +328,7 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { /// Note: There is a small race condition between this check and the actual bind - /// another process could grab the port in between. This is unlikely in practice /// and the actual bind will fail with a clear error if it happens. +#[cfg(not(simulation))] pub fn is_port_available(host: &str, port: u16) -> bool { let requested = match parse_host(host) { Some(r) => r, @@ -335,11 +363,13 @@ pub fn is_port_available(host: &str, port: u16) -> bool { } #[derive(Debug, Clone, Copy)] +#[cfg(not(simulation))] enum RequestedHost { Localhost, Ip(IpAddr), } +#[cfg(not(simulation))] fn parse_host(host: &str) -> Option { let host = host.trim(); @@ -353,6 +383,7 @@ fn parse_host(host: &str) -> Option { host.parse::().ok().map(RequestedHost::Ip) } +#[cfg(not(simulation))] fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { match requested { RequestedHost::Localhost => match listener_addr { @@ -423,6 +454,7 @@ fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { /// Find an available port starting from the requested port. /// Returns the first port that is available on both IPv4 and IPv6. +#[cfg(not(simulation))] fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Option { for offset in 0..max_attempts { let port = requested_port.saturating_add(offset); @@ -437,6 +469,7 @@ fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Op } /// Prompt the user with a yes/no question. Returns true if they answer yes. +#[cfg(not(simulation))] fn prompt_yes_no(question: &str) -> bool { print!("{} [y/N] ", question); io::stdout().flush().ok(); From c83ed2e99035e8d5eed1459d422cda047b18de59 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 20:41:16 +0530 Subject: [PATCH 38/74] LockedFsRepo --- crates/commitlog/src/lib.rs | 30 +++++++-- crates/commitlog/src/repo/mod.rs | 10 +++ crates/durability/src/imp/local.rs | 105 +++++++++++++++++++++++------ 3 files changed, 117 insertions(+), 28 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index d80c1fb00b7..4b5727bc64c 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -6,7 +6,7 @@ use std::{ }; use log::trace; -use repo::{fs::OnNewSegmentFn, Repo}; +use repo::{fs::OnNewSegmentFn, Repo, RepoWithSizeOnDisk}; use spacetimedb_paths::server::CommitLogDir; pub mod commit; @@ -188,11 +188,6 @@ impl Commitlog { Self::open_with_repo(repo::Fs::new(root, on_new_segment)?, opts) } - /// Determine the size on disk of this commitlog. - pub fn size_on_disk(&self) -> io::Result { - let inner = self.inner.read().unwrap(); - inner.repo.size_on_disk() - } } impl Commitlog @@ -210,6 +205,29 @@ where inner: RwLock::new(inner), }) } +} + +impl Commitlog +where + R: RepoWithSizeOnDisk, +{ + /// Determine the size on disk of this commitlog. + pub fn size_on_disk(&self) -> io::Result { + let inner = self.inner.read().unwrap(); + inner.repo.size_on_disk() + } +} + +impl RepoWithSizeOnDisk for repo::Fs { + fn size_on_disk(&self) -> io::Result { + Self::size_on_disk(self) + } +} + +impl Commitlog +where + R: Repo, +{ /// Determine the maximum transaction offset considered durable. /// diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 358936c3c2a..0efa173f8f6 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -144,6 +144,11 @@ pub trait Repo: Clone + fmt::Display { } } +/// Capability trait for repos that can report storage usage. +pub trait RepoWithSizeOnDisk: Repo { + fn size_on_disk(&self) -> io::Result; +} + /// Marker for repos that do not require an external lock file. /// /// Durability implementations can use this to expose repo-backed opening @@ -152,6 +157,11 @@ pub trait Repo: Clone + fmt::Display { pub trait RepoWithoutLockFile: Repo {} impl RepoWithoutLockFile for &T {} +impl RepoWithSizeOnDisk for &T { + fn size_on_disk(&self) -> io::Result { + T::size_on_disk(self) + } +} #[cfg(any(test, feature = "test"))] impl RepoWithoutLockFile for Memory {} diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 5cc03099ab6..ab5f44217b8 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -14,7 +14,7 @@ use scopeguard::ScopeGuard; use spacetimedb_commitlog::{ error, payload::Txdata, - repo::{Fs, Repo, RepoWithoutLockFile}, + repo::{Fs, Repo, RepoWithSizeOnDisk, RepoWithoutLockFile}, Commit, Commitlog, Decoder, Encode, Transaction, }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; @@ -88,7 +88,7 @@ pub enum OpenError { /// /// Note, however, that instantiating `T` to a different type may require to /// change the log format version! -pub struct Local +pub struct Local where R: Repo, { @@ -114,7 +114,82 @@ where actor: Mutex>>, } -impl Local { +/// Commitlog repo backed by [`Fs`] and protected by a [`LockedFile`]. +#[derive(Clone, Debug)] +pub struct LockedFsRepo { + repo: Fs, + #[allow(unused)] + lock: Arc, +} + +impl LockedFsRepo { + pub fn open(replica_dir: ReplicaDir, on_new_segment: Option>) -> Result { + // We use the `db.lock` file for historical reasons and to keep + // compatibility with existing standalone layouts. + let lock = LockedFile::lock(replica_dir.0.join("db.lock")).map(Arc::new)?; + let repo = Fs::new(replica_dir.commit_log(), on_new_segment)?; + Ok(Self { repo, lock }) + } +} + +impl std::fmt::Display for LockedFsRepo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.repo.fmt(f) + } +} + +impl Repo for LockedFsRepo { + type SegmentWriter = ::SegmentWriter; + type SegmentReader = ::SegmentReader; + + fn create_segment(&self, offset: u64, header: spacetimedb_commitlog::segment::Header) -> io::Result { + self.repo.create_segment(offset, header) + } + + fn open_segment_reader(&self, offset: u64) -> io::Result { + self.repo.open_segment_reader(offset) + } + + fn open_segment_writer(&self, offset: u64) -> io::Result { + self.repo.open_segment_writer(offset) + } + + fn segment_file_path(&self, offset: u64) -> Option { + self.repo.segment_file_path(offset) + } + + fn remove_segment(&self, offset: u64) -> io::Result<()> { + self.repo.remove_segment(offset) + } + + fn compress_segment(&self, offset: u64) -> io::Result<()> { + self.repo.compress_segment(offset) + } + + fn existing_offsets(&self) -> io::Result> { + self.repo.existing_offsets() + } + + fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { + self.repo.create_offset_index(offset, cap) + } + + fn remove_offset_index(&self, offset: TxOffset) -> io::Result<()> { + self.repo.remove_offset_index(offset) + } + + fn get_offset_index(&self, offset: TxOffset) -> io::Result { + self.repo.get_offset_index(offset) + } +} + +impl RepoWithSizeOnDisk for LockedFsRepo { + fn size_on_disk(&self) -> io::Result { + self.repo.size_on_disk() + } +} + +impl Local { /// Create a [`Local`] instance at the `replica_dir`. /// /// `replica_dir` must already exist. @@ -130,17 +205,9 @@ impl Local { on_new_segment: Option>, ) -> Result { info!("open local durability"); - - // We could just place a lock on the commitlog directory, - // yet for backwards-compatibility, we keep using the `db.lock` file. - let lock = LockedFile::lock(replica_dir.0.join("db.lock"))?; - - let clog = Arc::new(Commitlog::open( - replica_dir.commit_log(), - opts.commitlog, - on_new_segment, - )?); - Self::open_inner(clog, rt, opts, Some(lock)) + let repo = LockedFsRepo::open(replica_dir, on_new_segment)?; + let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); + Self::open_inner(clog, rt, opts) } } @@ -153,7 +220,6 @@ where clog: Arc, R>>, rt: tokio::runtime::Handle, opts: Options, - lock: Option, ) -> Result { let queue_capacity = opts.queue_capacity(); let (queue, txdata_rx) = async_channel::bounded(queue_capacity); @@ -168,8 +234,6 @@ where queue_depth: queue_depth.clone(), batch_capacity: opts.batch_capacity, - - lock, } .run(txdata_rx), ); @@ -198,7 +262,7 @@ where pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); - Self::open_inner(clog, rt, opts, None) + Self::open_inner(clog, rt, opts) } } @@ -229,7 +293,7 @@ where } } -impl Local { +impl Local { /// Get the size on disk of the underlying [`Commitlog`]. pub fn size_on_disk(&self) -> io::Result { self.clog.size_on_disk() @@ -246,9 +310,6 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, - - #[allow(unused)] - lock: Option, } impl Actor From 813e418bc3929a48522cc4db18a0c65cdad0e86d Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 21:13:36 +0530 Subject: [PATCH 39/74] comments --- crates/commitlog/src/lib.rs | 5 ++--- crates/commitlog/src/repo/mod.rs | 8 ++++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index 4b5727bc64c..9e640733613 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -187,7 +187,6 @@ impl Commitlog { } Self::open_with_repo(repo::Fs::new(root, on_new_segment)?, opts) } - } impl Commitlog @@ -228,7 +227,6 @@ impl Commitlog where R: Repo, { - /// Determine the maximum transaction offset considered durable. /// /// The offset is `None` if the log hasn't been flushed to disk yet. @@ -423,8 +421,9 @@ where } } -impl Commitlog +impl Commitlog where + T: Encode, R: Repo, { /// Write `transactions` to the log. diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 0efa173f8f6..4bbf72a97f8 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -156,8 +156,12 @@ pub trait RepoWithSizeOnDisk: Repo { /// violate single-writer safety. pub trait RepoWithoutLockFile: Repo {} -impl RepoWithoutLockFile for &T {} -impl RepoWithSizeOnDisk for &T { +impl RepoWithoutLockFile for &T where T: RepoWithoutLockFile {} + +impl RepoWithSizeOnDisk for &T +where + T: RepoWithSizeOnDisk, +{ fn size_on_disk(&self) -> io::Result { T::size_on_disk(self) } From 5946261a2617e7a95494d291f2d04333c2bb995e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 21:25:17 +0530 Subject: [PATCH 40/74] cleanup --- crates/commitlog/src/repo/mod.rs | 13 ------------ crates/durability/src/imp/local.rs | 34 ++++++++++++++++-------------- 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 4bbf72a97f8..5e1b313e766 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -149,15 +149,6 @@ pub trait RepoWithSizeOnDisk: Repo { fn size_on_disk(&self) -> io::Result; } -/// Marker for repos that do not require an external lock file. -/// -/// Durability implementations can use this to expose repo-backed opening -/// only for storage backends where skipping the filesystem `db.lock` cannot -/// violate single-writer safety. -pub trait RepoWithoutLockFile: Repo {} - -impl RepoWithoutLockFile for &T where T: RepoWithoutLockFile {} - impl RepoWithSizeOnDisk for &T where T: RepoWithSizeOnDisk, @@ -166,10 +157,6 @@ where T::size_on_disk(self) } } - -#[cfg(any(test, feature = "test"))] -impl RepoWithoutLockFile for Memory {} - impl Repo for &T { type SegmentWriter = T::SegmentWriter; type SegmentReader = T::SegmentReader; diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index ab5f44217b8..90a103ae91d 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -11,10 +11,12 @@ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; use scopeguard::ScopeGuard; +#[cfg(any(test, feature = "test"))] +use spacetimedb_commitlog::repo::Memory; use spacetimedb_commitlog::{ error, payload::Txdata, - repo::{Fs, Repo, RepoWithSizeOnDisk, RepoWithoutLockFile}, + repo::{Fs, Repo, RepoWithSizeOnDisk}, Commit, Commitlog, Decoder, Encode, Transaction, }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; @@ -142,7 +144,11 @@ impl Repo for LockedFsRepo { type SegmentWriter = ::SegmentWriter; type SegmentReader = ::SegmentReader; - fn create_segment(&self, offset: u64, header: spacetimedb_commitlog::segment::Header) -> io::Result { + fn create_segment( + &self, + offset: u64, + header: spacetimedb_commitlog::segment::Header, + ) -> io::Result { self.repo.create_segment(offset, header) } @@ -170,7 +176,11 @@ impl Repo for LockedFsRepo { self.repo.existing_offsets() } - fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { + fn create_offset_index( + &self, + offset: TxOffset, + cap: u64, + ) -> io::Result { self.repo.create_offset_index(offset, cap) } @@ -216,6 +226,11 @@ where T: Encode + Send + Sync + 'static, R: Repo + Send + Sync + 'static, { + pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { + info!("open local durability"); + let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); + Self::open_inner(clog, rt, opts) + } fn open_inner( clog: Arc, R>>, rt: tokio::runtime::Handle, @@ -253,19 +268,6 @@ where } } -impl Local -where - T: Encode + Send + Sync + 'static, - R: RepoWithoutLockFile + Send + Sync + 'static, -{ - /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { - info!("open local durability"); - let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); - Self::open_inner(clog, rt, opts) - } -} - impl Local where T: Send + Sync + 'static, From 2104ced1fa78f72dfe7e660ad2cd6ac485a70186 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 15:38:28 +0530 Subject: [PATCH 41/74] lint Signed-off-by: Shubham Mishra --- crates/durability/src/imp/local.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 65f7499b79f..5b3124068f1 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -11,8 +11,6 @@ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; use scopeguard::ScopeGuard; -#[cfg(any(test, feature = "test"))] -use spacetimedb_commitlog::repo::Memory; use spacetimedb_commitlog::{ error, payload::Txdata, From fc2e146d6cd28e8424027f944e92017d53bd54e1 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 17:29:09 +0530 Subject: [PATCH 42/74] make sim module mostly non_std --- Cargo.lock | 2 +- crates/core/src/db/durability.rs | 4 +- crates/core/src/db/persistence.rs | 18 +- crates/core/src/db/relational_db.rs | 42 +- crates/core/src/db/snapshot.rs | 29 +- crates/core/src/runtime.rs | 3 +- .../subscription/module_subscription_actor.rs | 2 +- crates/durability/src/imp/local.rs | 53 +-- crates/durability/tests/io/fallocate.rs | 2 +- crates/runtime/Cargo.toml | 5 +- crates/runtime/README.md | 255 ++++------ crates/runtime/src/adapter/mod.rs | 5 + crates/runtime/src/adapter/sim_std.rs | 361 +++++++++++++++ crates/runtime/src/adapter/tokio.rs | 11 + crates/runtime/src/lib.rs | 40 +- crates/runtime/src/sim/buggify.rs | 51 ++ crates/runtime/src/sim/config.rs | 16 + crates/runtime/src/sim/executor.rs | 389 +++++++++------- crates/runtime/src/sim/mod.rs | 17 +- crates/runtime/src/sim/rng.rs | 437 +++++++----------- crates/runtime/src/sim/system_thread.rs | 64 --- crates/runtime/src/sim/time.rs | 343 -------------- crates/runtime/src/sim/time/mod.rs | 297 ++++++++++++ crates/runtime/src/sim/time/sleep.rs | 97 ++++ crates/runtime/tests/sim_e2e.rs | 108 +++++ 25 files changed, 1532 insertions(+), 1119 deletions(-) create mode 100644 crates/runtime/src/adapter/mod.rs create mode 100644 crates/runtime/src/adapter/sim_std.rs create mode 100644 crates/runtime/src/adapter/tokio.rs create mode 100644 crates/runtime/src/sim/buggify.rs create mode 100644 crates/runtime/src/sim/config.rs delete mode 100644 crates/runtime/src/sim/system_thread.rs delete mode 100644 crates/runtime/src/sim/time.rs create mode 100644 crates/runtime/src/sim/time/mod.rs create mode 100644 crates/runtime/src/sim/time/sleep.rs create mode 100644 crates/runtime/tests/sim_e2e.rs diff --git a/Cargo.lock b/Cargo.lock index a0193647eae..a3e768f96e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8470,11 +8470,11 @@ dependencies = [ name = "spacetimedb-runtime" version = "2.2.0" dependencies = [ - "anyhow", "async-task", "futures", "futures-util", "libc", + "spin", "tokio", "tracing", ] diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index 3a466d53eb6..6d3b814a55f 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -10,7 +10,7 @@ use spacetimedb_durability::Transaction; use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; -use crate::{db::persistence::Durability, runtime::RuntimeDispatch}; +use crate::{db::persistence::Durability, runtime::Runtime}; pub(super) fn request_durability( durability: &Durability, @@ -31,7 +31,7 @@ pub(super) fn request_durability( })); } -pub(super) fn spawn_close(durability: Arc, runtime: &RuntimeDispatch, database_identity: Identity) { +pub(super) fn spawn_close(durability: Arc, runtime: &Runtime, database_identity: Identity) { let label = format!("[{database_identity}]"); let runtime = runtime.clone(); runtime.clone().spawn(async move { diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index 83d58befb06..cd69b2d82ad 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -6,7 +6,7 @@ use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::DynSnapshotRepo; -use crate::{messages::control_db::Database, runtime::RuntimeDispatch, util::asyncify}; +use crate::{messages::control_db::Database, runtime::Runtime, util::asyncify}; use super::{ relational_db::{self, Txdata}, @@ -42,7 +42,7 @@ pub struct Persistence { /// this type. pub snapshots: Option, /// Runtime onto which durability-related tasks shall be spawned. - pub runtime: RuntimeDispatch, + pub runtime: Runtime, } impl Persistence { @@ -53,14 +53,14 @@ impl Persistence { snapshots: Option, runtime: tokio::runtime::Handle, ) -> Self { - Self::new_with_runtime(durability, disk_size, snapshots, RuntimeDispatch::tokio(runtime)) + Self::new_with_runtime(durability, disk_size, snapshots, Runtime::tokio(runtime)) } pub fn new_with_runtime( durability: impl spacetimedb_durability::Durability + 'static, disk_size: impl Fn() -> io::Result + Send + Sync + 'static, snapshots: Option, - runtime: RuntimeDispatch, + runtime: Runtime, ) -> Self { Self { durability: Arc::new(durability), @@ -100,7 +100,7 @@ impl Persistence { Option>, Option, Option, - Option, + Option, ) { this.map( |Self { @@ -158,11 +158,7 @@ impl PersistenceProvider for LocalPersistenceProvider { asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await .map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Enabled, - RuntimeDispatch::tokio_current(), - ) + SnapshotWorker::new_with_repository(repo, snapshot::Compression::Enabled, Runtime::tokio_current()) })?; let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; @@ -177,7 +173,7 @@ impl PersistenceProvider for LocalPersistenceProvider { durability, disk_size, snapshots: Some(snapshot_worker), - runtime: RuntimeDispatch::tokio_current(), + runtime: Runtime::tokio_current(), }) } } diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 57c7cde59cc..b576b1e6c14 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -1,7 +1,7 @@ use crate::db::durability::{request_durability, spawn_close as spawn_durability_close}; use crate::db::MetricsRecorderQueue; use crate::error::{DBError, RestoreSnapshotError}; -use crate::runtime::RuntimeDispatch; +use crate::runtime::Runtime; use crate::subscription::ExecutionCounters; use crate::util::asyncify; use crate::worker_metrics::WORKER_METRICS; @@ -41,9 +41,9 @@ use spacetimedb_lib::db::raw_def::v9::{btree, RawModuleDefV9Builder, RawSql}; use spacetimedb_lib::st_var::StVarValue; use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; -use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; #[cfg(test)] use spacetimedb_paths::server::SnapshotDirPath; +use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; @@ -102,7 +102,7 @@ pub struct RelationalDB { inner: Locking, durability: Option>, - durability_runtime: Option, + durability_runtime: Option, snapshot_worker: Option, row_count_fn: RowCountFn, @@ -1678,7 +1678,7 @@ pub async fn local_durability( replica_dir: ReplicaDir, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { - let runtime = RuntimeDispatch::tokio_current(); + let runtime = Runtime::tokio_current(); let on_new_segment = snapshot_worker.map(|snapshot_worker| { let snapshot_worker = snapshot_worker.clone(); Arc::new(move || { @@ -1957,14 +1957,13 @@ pub mod tests_utils { ) -> Result<(RelationalDB, Arc>), DBError> { let snapshots = want_snapshot_repo .then(|| { - open_snapshot_repo(root.snapshots(), db_identity, replica_id) - .map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Disabled, - RuntimeDispatch::tokio(rt.clone()), - ) - }) + open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + Runtime::tokio(rt.clone()), + ) + }) }) .transpose()?; @@ -1975,7 +1974,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: RuntimeDispatch::tokio(rt), + runtime: Runtime::tokio(rt), }; let (db, _) = RelationalDB::open( @@ -2086,14 +2085,13 @@ pub mod tests_utils { ) -> Result<(RelationalDB, Arc>), DBError> { let snapshots = want_snapshot_repo .then(|| { - open_snapshot_repo(root.snapshots(), Identity::ZERO, 0) - .map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Disabled, - RuntimeDispatch::tokio(rt.clone()), - ) - }) + open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + Runtime::tokio(rt.clone()), + ) + }) }) .transpose()?; let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; @@ -2102,7 +2100,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: RuntimeDispatch::tokio(rt), + runtime: Runtime::tokio(rt), }; let db = Self::open_db(history, Some(persistence), None, 0)?; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index dda981a89bd..042b257b608 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -17,7 +17,7 @@ use spacetimedb_lib::Identity; use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo, SnapshotRepository}; use tokio::sync::watch; -use crate::{runtime::RuntimeDispatch, worker_metrics::WORKER_METRICS}; +use crate::{runtime::Runtime, worker_metrics::WORKER_METRICS}; pub type SnapshotDatabaseState = Arc>; @@ -69,7 +69,7 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repo: Arc, runtime: RuntimeDispatch) -> Self { + pub fn new(snapshot_repo: Arc, runtime: Runtime) -> Self { let database = snapshot_repo.database_identity(); let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); @@ -142,7 +142,7 @@ impl SnapshotWorker { pub fn new_with_repository( snapshot_repository: Arc, compression: Compression, - runtime: RuntimeDispatch, + runtime: Runtime, ) -> Self { let database = snapshot_repository.database_identity(); let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); @@ -200,7 +200,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, - runtime: RuntimeDispatch, + runtime: Runtime, compression: Option, } @@ -345,7 +345,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, - runtime: RuntimeDispatch, + runtime: Runtime, } impl Compressor { @@ -378,15 +378,16 @@ impl Compressor { let mut stats = self.stats.take().unwrap_or_default(); let runtime = self.runtime.clone(); - let (mut stats, res) = runtime.spawn_blocking({ - let range = range.clone(); - move || { - let _timer = inner_timer.start_timer(); - let res = snapshot_repo.compress_snapshots(&mut stats, range); - (stats, res) - } - }) - .await; + let (mut stats, res) = runtime + .spawn_blocking({ + let range = range.clone(); + move || { + let _timer = inner_timer.start_timer(); + let res = snapshot_repo.compress_snapshots(&mut stats, range); + (stats, res) + } + }) + .await; let elapsed = Duration::from_secs_f64(timer.stop_and_record()); self.metrics.report_and_reset(&mut stats); // Store stats for reuse. diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs index 53baad4d73a..4c55c71dccd 100644 --- a/crates/core/src/runtime.rs +++ b/crates/core/src/runtime.rs @@ -1,3 +1,4 @@ //! Runtime boundary re-exported for core call sites. -pub use spacetimedb_runtime::{current_handle_or_new_runtime, Handle, Runtime, RuntimeDispatch, RuntimeTimeout}; +pub use spacetimedb_runtime::{current_handle_or_new_runtime, TokioHandle, TokioRuntime}; +pub use spacetimedb_runtime::{Runtime, RuntimeTimeout}; diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index f82d36286d4..2332782a52d 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2061,7 +2061,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: crate::runtime::RuntimeDispatch::tokio(rt), + runtime: crate::runtime::Runtime::tokio(rt), }), None, 0, diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 51d89e2e848..f59df89c920 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -18,7 +18,7 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; -use spacetimedb_runtime::RuntimeDispatch; +use spacetimedb_runtime::Runtime; use thiserror::Error; use tokio::sync::{oneshot, watch}; use tracing::{instrument, Span}; @@ -122,7 +122,7 @@ impl Local { /// This is used to capture a snapshot each new segment. pub fn open( replica_dir: ReplicaDir, - runtime: RuntimeDispatch, + runtime: Runtime, opts: Options, on_new_segment: Option>, ) -> Result { @@ -148,7 +148,7 @@ where { fn open_inner( clog: Arc, R>>, - runtime: RuntimeDispatch, + runtime: Runtime, opts: Options, lock: Option, ) -> Result { @@ -190,7 +190,7 @@ where R: RepoWithoutLockFile + Send + Sync + 'static, { /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, runtime: RuntimeDispatch, opts: Options) -> Result { + pub fn open_with_repo(repo: R, runtime: Runtime, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); Self::open_inner(clog, runtime, opts, None) @@ -241,7 +241,7 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, - runtime: RuntimeDispatch, + runtime: Runtime, _lock: Option, } @@ -277,14 +277,15 @@ where let ready_len = tx_buf.len(); self.queue_depth.fetch_sub(ready_len as u64, Relaxed); let runtime = self.runtime.clone(); - tx_buf = runtime.spawn_blocking(move || -> io::Result>>> { - for tx in tx_buf.drain(..) { - clog.commit([tx.into_transaction()])?; - } - Ok(tx_buf) - }) - .await - .expect("commitlog write failed"); + tx_buf = runtime + .spawn_blocking(move || -> io::Result>>> { + for tx in tx_buf.drain(..) { + clog.commit([tx.into_transaction()])?; + } + Ok(tx_buf) + }) + .await + .expect("commitlog write failed"); if self.flush_and_sync().await.is_err() { sync_on_exit = false; break; @@ -317,19 +318,19 @@ where let runtime = self.runtime.clone(); runtime .spawn_blocking(move || { - let _span = span.enter(); - clog.flush_and_sync() - }) - .await - .inspect_err(|e| warn!("error flushing commitlog: {e:#}")) - .inspect(|maybe_offset| { - if let Some(new_offset) = maybe_offset { - trace!("synced to offset {new_offset}"); - self.durable_offset.send_modify(|val| { - val.replace(*new_offset); - }); - } - }) + let _span = span.enter(); + clog.flush_and_sync() + }) + .await + .inspect_err(|e| warn!("error flushing commitlog: {e:#}")) + .inspect(|maybe_offset| { + if let Some(new_offset) = maybe_offset { + trace!("synced to offset {new_offset}"); + self.durable_offset.send_modify(|val| { + val.replace(*new_offset); + }); + } + }) } } diff --git a/crates/durability/tests/io/fallocate.rs b/crates/durability/tests/io/fallocate.rs index be5ee61bc0b..2783b2178ec 100644 --- a/crates/durability/tests/io/fallocate.rs +++ b/crates/durability/tests/io/fallocate.rs @@ -161,7 +161,7 @@ async fn local_durability( ) -> Result, spacetimedb_durability::local::OpenError> { spacetimedb_durability::Local::open( dir, - spacetimedb_runtime::RuntimeDispatch::tokio_current(), + spacetimedb_runtime::Runtime::tokio_current(), spacetimedb_durability::local::Options { commitlog: spacetimedb_commitlog::Options { max_segment_size, diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index 6f62e0e6b08..a86ee9d0fc4 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -10,15 +10,16 @@ rust-version.workspace = true workspace = true [dependencies] -anyhow.workspace = true futures.workspace = true futures-util.workspace = true tokio = { workspace = true, optional = true } async-task = { version = "4.4", optional = true } +spin = { version = "0.9", default-features = false, features = ["mutex", "spin_mutex"], optional = true } libc = { version = "0.2", optional = true } tracing = { workspace = true, optional = true } [features] default = ["tokio"] tokio = ["dep:tokio"] -simulation = ["dep:async-task", "dep:libc", "dep:tracing"] +simulation = ["dep:async-task", "dep:spin"] +simulation-std = ["simulation", "dep:libc", "dep:tracing"] diff --git a/crates/runtime/README.md b/crates/runtime/README.md index f26134ba7bd..576122be42e 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,182 +1,127 @@ # spacetimedb-runtime -`spacetimedb-runtime` is the small runtime abstraction layer shared by core -code and DST. It exists for one reason: code such as durability and -snapshotting needs to spawn work, run blocking sections, and wait with -timeouts, but we want that same code to run on either: - -- real Tokio in production, or -- the deterministic DST simulator in tests. - -The crate keeps that boundary narrow. Most callers should depend on -`RuntimeDispatch` instead of reaching directly for Tokio or simulator internals. - -## Top-level API - -The top-level module in [src/lib.rs](./src/lib.rs) exposes: - -- `RuntimeDispatch` - A small tagged runtime handle with two backends: - - `Tokio(tokio::runtime::Handle)` when the `tokio` feature is enabled - - `Simulation(sim::Handle)` when the `simulation` feature is enabled -- `spawn(...)` - Fire-and-forget task spawning. -- `spawn_blocking(...)` - Run blocking work on the runtime-appropriate backend. - On Tokio this uses `tokio::task::spawn_blocking`. - In simulation this is still scheduled through the simulator so ordering stays - deterministic. -- `timeout(...)` - Runtime-relative timeout handling. - On Tokio this uses `tokio::time::timeout`. - In simulation this uses virtual time from `sim::time`. -- `current_handle_or_new_runtime()` - Tokio convenience for production code that may or may not already be inside a - Tokio runtime. - -The design goal is intentionally modest: this crate is not a general async -framework. It is a compatibility layer for the small set of runtime operations -SpacetimeDB core code actually needs. - -## Features - -The crate has two independent backends: +`spacetimedb-runtime` is the runtime boundary shared by SpacetimeDB core code +and DST. The goal is not to emulate all of Tokio. We do not aim to support +`tokio::net`, `tokio::fs`, or arbitrary ecosystem compatibility here. The goal +is much narrower: provide the small amount of execution control that core +database code needs so that it can run under either a deterministic single- +threaded runtime or a hosted adapter. -- `tokio` - Enables production runtime support and is part of the default feature set. -- `simulation` - Enables the deterministic local simulation runtime used by DST. - -Code can compile with one or both features enabled. `RuntimeDispatch` exposes -only the backends that were actually compiled in. - -## Simulation Modules - -The simulation backend lives under [src/sim](./src/sim). - -### `sim::mod` - -[src/sim/mod.rs](./src/sim/mod.rs) is the façade for the deterministic runtime. -It re-exports the main executor types and keeps the public surface small: - -- `Runtime` - Owns the simulator executor. -- `Handle` - Cloneable access to that executor from spawned tasks. -- `NodeId` - Logical node identifier used to group and pause/resume work. -- `JoinHandle` - Awaitable handle for spawned simulated tasks. -- `yield_now` - Cooperative yield point inside the simulator. -- `time` - Virtual time utilities. -- `Rng` and `DecisionSource` - Deterministic randomness primitives. - -It also exposes small helpers such as `advance_time(...)` and -`decision_source(...)`. - -### `sim::executor` - -[src/sim/executor.rs](./src/sim/executor.rs) is the heart of the simulator. +The crate is intentionally hybrid. Some parts of the process are naturally +Tokio-owned today, especially networking, subscriptions, and other integration- +heavy infrastructure. DST and selected core/database paths need a different +model: single-threaded, deterministic scheduling, explicit time, and a runtime +that can move toward `no_std + alloc`. This crate exists to support both +execution domains without forcing the whole process onto one scheduler. -It provides a single-threaded async executor adapted from madsim's task loop: +## Architecture -- tasks are stored as `async_task` runnables -- ready work is chosen by a deterministic RNG instead of an OS/runtime scheduler -- node state can be paused and resumed -- a thread-local handle context makes the current simulation runtime accessible - from inside spawned work -- determinism can be checked by replaying the same future twice and comparing - the sequence of scheduler decisions +The top-level type in [src/lib.rs](./src/lib.rs) is `Runtime`. It is the small +facade that shared core code should depend on. `Runtime` is not the simulator +itself and it is not Tokio. It is a tagged handle with the backends that matter +to SpacetimeDB: -Important behavior: +- `Runtime::Tokio(TokioHandle)` when the `tokio` feature is enabled +- `Runtime::Simulation(sim::Handle)` when the `simulation` feature is enabled -- `Runtime::block_on(...)` drives the whole simulation -- `Handle::spawn_on(...)` schedules work onto a logical node -- absence of runnable work and absence of future timer wakeups is treated as a - hang, which is exactly what DST wants +Code such as durability and snapshotting should accept or store `Runtime` and +use only the narrow operations exposed there: `spawn`, `spawn_blocking`, and +`timeout`. That keeps shared logic independent of the hosted runtime choice. -This module is the reason `RuntimeDispatch::Simulation` can behave like a real -runtime without giving up reproducibility. +Under that facade, this crate has two layers. -### `sim::time` +The first layer is the simulation core under [src/sim](./src/sim). This is the +deterministic single-thread runtime used by DST. The long-term direction for +this layer is `no_std + alloc`, explicit handles, explicit time, and no +dependency on ambient host facilities. -[src/sim/time.rs](./src/sim/time.rs) implements virtual time. +The second layer is the hosted adapter layer under [src/adapter](./src/adapter). +Today that includes a Tokio adapter and std-hosted simulation conveniences. The +Tokio adapter exists because some production and testing paths still need a real +process runtime. The std-hosted simulation helpers exist because determinism +testing, thread-local convenience APIs, and Unix hooks are useful in hosted +environments even though they are not part of the portable simulation core. -It provides: +## Feature Model -- `now()` - Current simulated time. -- `sleep(duration)` - A future that completes when simulated time reaches the deadline. -- `timeout(duration, future)` - Race a future against simulated time. -- `advance(duration)` - Move time forward explicitly. +The crate is organized around features that reflect that layering. -Internally it maintains: - -- a current `Duration` -- timer registrations keyed by deadline -- wakeups for due timers - -The executor uses this module to move time only when necessary, which keeps -tests deterministic and avoids tying correctness to wall-clock behavior. - -### `sim::rng` - -[src/sim/rng.rs](./src/sim/rng.rs) provides deterministic randomness. - -There are two layers: - -- `Rng` - Stateful deterministic RNG used by the executor and runtime internals. -- `DecisionSource` - Small lock-free source for probabilistic choices in test/workload code. - -This module also does two extra jobs: - -- records and checks determinism checkpoints so repeated seeded runs can prove - they took the same execution path -- hooks libc randomness calls such as `getrandom` so code running inside the - simulator sees deterministic randomness instead of ambient system entropy +- `simulation` + Enables the deterministic simulation runtime core. This is the part that is + intended to move toward `no_std + alloc`. +- `simulation-std` + Enables std-hosted conveniences layered on top of `simulation`, such as + thread-local current-handle access, determinism replay helpers, and host OS + integration hooks used by DST in a normal process. +- `tokio` + Enables the Tokio-backed hosted adapter and remains part of the default + feature set for now. +- `std` + Enables hosted-only functionality shared by the adapter layer. -That second point matters because reproducibility falls apart quickly if a -dependency reads randomness outside the simulator's control. +This means “simulation” is not shorthand for “all simulation tooling.” It is +the portable runtime core. Hosted extras live behind `simulation-std`. -### `sim::system_thread` +## Simulation Core -[src/sim/system_thread.rs](./src/sim/system_thread.rs) prevents accidental OS -thread creation while running under simulation. +The simulation core lives under [src/sim](./src/sim). -On Unix it intercepts `pthread_attr_init` and fails fast if code tries to spawn -real system threads from inside the simulator. That protects determinism and -enforces the intended execution model: simulated tasks should run on the -simulator, not escape onto real threads. +[src/sim/executor.rs](./src/sim/executor.rs) contains the single-threaded +deterministic executor. It stores ready tasks as `async_task` runnables, uses a +deterministic RNG to choose the next runnable, supports pause/resume by logical +node, and treats “no runnable work and no future timer wakeups” as a hang. -## How This Crate Is Intended To Be Used +[src/sim/time.rs](./src/sim/time.rs) contains virtual time. It owns simulated +time state, timer registration, and timeout behavior. The key property is that +time moves only under runtime control, not wall clock control. -For core code: +[src/sim/rng.rs](./src/sim/rng.rs) contains deterministic randomness. The +runtime uses this for scheduler choices, and test/workload code can use +`DecisionSource` when it needs deterministic probabilistic decisions. -- accept or store `RuntimeDispatch` -- use `spawn`, `spawn_blocking`, and `timeout` -- avoid embedding raw Tokio assumptions into shared logic +The public simulation surface is intentionally explicit: `sim::Runtime`, +`sim::Handle`, `sim::NodeId`, `sim::JoinHandle`, `yield_now`, and the virtual +time and RNG utilities. The portable direction is to make explicit-handle APIs +the main interface, with host-style convenience APIs layered separately. -For production-only code: +## Adapter Layer -- use `RuntimeDispatch::tokio_current()` or `RuntimeDispatch::tokio(handle)` +The adapter layer lives under [src/adapter](./src/adapter). -For DST: +[src/adapter/tokio.rs](./src/adapter/tokio.rs) is the Tokio facade. It defines +the hosted Tokio types used by the top-level runtime facade and provides +`current_handle_or_new_runtime()` for production code that may or may not +already be inside a Tokio runtime. -- create `sim::Runtime` -- run the test harness with `Runtime::block_on(...)` -- pass `RuntimeDispatch::simulation_current()` into the code under test +Std-hosted simulation helpers stay outside the simulation core as well. These +helpers are valuable, but they are adapters around the core, not the core +itself. Examples include thread-local “current runtime” access, determinism +replay helpers, and Unix hooks that prevent simulation from silently escaping +onto real OS threads. ## Current Scope -This crate is intentionally narrow. It is not trying to replace Tokio, and it -is not a generic distributed simulator. It currently provides exactly the -runtime seams needed by SpacetimeDB components that must run both in production -and under deterministic simulation. +This crate is not trying to make the whole of core `no_std` immediately. For +now, crates such as `relational_db`, `snapshot`, `commitlog`, and `datastore` +may still use `tokio::sync` internally. That is acceptable in the short term, +because those synchronization primitives are runtime-agnostic enough for DST and +the current runtime boundary effort is focused on execution control, not total +removal of Tokio-adjacent types from core. + +The longer-term goal is to reduce those dependencies where it materially helps +portability or determinism, but that work is explicitly out of scope for the +first phase of this crate architecture. + +## Intended Usage + +Shared core/database code should depend on `Runtime`, not on raw Tokio handles +or simulator internals. DST should construct `sim::Runtime` directly and use it +to drive deterministic test execution. Hosted production/testing code that still +needs Tokio should use the Tokio adapter through `Runtime::tokio(...)`, +`Runtime::tokio_current()`, and `current_handle_or_new_runtime()`. + +The likely end state is still hybrid: core/database execution may eventually run +on the same deterministic single-thread runtime in both DST and selected +production paths, while networking, clients, subscriptions, and other hosted +subsystems continue to live on Tokio. That is a deliberate design choice, not a +temporary inconsistency. diff --git a/crates/runtime/src/adapter/mod.rs b/crates/runtime/src/adapter/mod.rs new file mode 100644 index 00000000000..a254877d883 --- /dev/null +++ b/crates/runtime/src/adapter/mod.rs @@ -0,0 +1,5 @@ +#[cfg(feature = "tokio")] +pub mod tokio; + +#[cfg(feature = "simulation-std")] +pub mod sim_std; diff --git a/crates/runtime/src/adapter/sim_std.rs b/crates/runtime/src/adapter/sim_std.rs new file mode 100644 index 00000000000..2eaa160adec --- /dev/null +++ b/crates/runtime/src/adapter/sim_std.rs @@ -0,0 +1,361 @@ +use alloc::boxed::Box; +use core::{ + cell::{Cell, RefCell}, + future::Future, + ptr, + time::Duration, +}; +use std::sync::OnceLock; + +use crate::sim; + +thread_local! { + static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; + static CURRENT_RNG: RefCell> = const { RefCell::new(None) }; + static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; + static IN_SIMULATION: Cell = const { Cell::new(false) }; +} + +pub(crate) struct HandleContextGuard { + previous: Option, +} + +pub(crate) struct RngContextGuard { + previous: Option, +} + +pub(crate) struct SimulationThreadGuard { + previous: bool, +} + +pub fn simulation_current() -> crate::Runtime { + crate::Runtime::simulation(current_handle().expect("simulation runtime is not active on this thread")) +} + +pub fn block_on(runtime: &mut sim::Runtime, future: F) -> F::Output { + ensure_rng_hooks_linked(); + if !init_std_random_state(runtime.rng().seed()) { + tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); + } + let _handle_context = enter_handle_context(runtime.handle()); + let _system_thread_context = enter_simulation_thread(); + let _rng_context = enter_rng_context(runtime.rng()); + runtime.block_on(future) +} + +pub fn current_handle() -> Option { + CURRENT_HANDLE.with(|handle| handle.borrow().clone()) +} + +pub fn advance_time(duration: Duration) { + current_handle() + .expect("simulation runtime is not active on this thread") + .advance(duration); +} + +pub fn now() -> Duration { + current_handle().map(|handle| handle.now()).unwrap_or_default() +} + +pub fn sleep(duration: Duration) -> sim::time::Sleep { + current_handle() + .expect("sim::time::sleep polled outside sim runtime") + .sleep(duration) +} + +pub async fn timeout(duration: Duration, future: impl Future) -> Result { + current_handle() + .expect("sim::time::timeout polled outside sim runtime") + .timeout(duration, future) + .await +} + +pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output +where + F: Future + 'static, + F::Output: Send + 'static, +{ + check_determinism_with(seed, make_future) +} + +pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output +where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, +{ + let first = make_future.clone(); + let log = std::thread::spawn(move || { + let mut runtime = sim::Runtime::new(seed); + runtime.enable_determinism_log(); + block_on(&mut runtime, first()); + runtime + .take_determinism_log() + .expect("determinism log should be enabled") + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap(); + + std::thread::spawn(move || { + let mut runtime = sim::Runtime::new(seed); + runtime.enable_determinism_check(log); + let output = block_on(&mut runtime, make_future()); + runtime.finish_determinism_check().unwrap_or_else(|err| panic!("{err}")); + output + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap() +} + +pub fn enable_buggify() { + current_handle() + .expect("simulation runtime is not active on this thread") + .enable_buggify(); +} + +pub fn disable_buggify() { + current_handle() + .expect("simulation runtime is not active on this thread") + .disable_buggify(); +} + +pub fn is_buggify_enabled() -> bool { + current_handle().is_some_and(|handle| handle.is_buggify_enabled()) +} + +pub fn buggify() -> bool { + current_handle() + .expect("simulation runtime is not active on this thread") + .buggify() +} + +pub fn buggify_with_prob(probability: f64) -> bool { + current_handle() + .expect("simulation runtime is not active on this thread") + .buggify_with_prob(probability) +} + +pub(crate) fn enter_handle_context(handle: sim::Handle) -> HandleContextGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); + HandleContextGuard { previous } +} + +pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { + let previous = IN_SIMULATION.with(|state| state.replace(true)); + SimulationThreadGuard { previous } +} + +pub(crate) fn enter_rng_context(rng: sim::GlobalRng) -> RngContextGuard { + let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); + RngContextGuard { previous } +} + +fn in_simulation() -> bool { + IN_SIMULATION.with(Cell::get) +} + +fn init_std_random_state(seed: u64) -> bool { + STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); + let _ = std::collections::hash_map::RandomState::new(); + STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() +} + +fn ensure_rng_hooks_linked() { + unsafe { + getentropy(ptr::null_mut(), 0); + } +} + +fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { + if buflen == 0 { + return; + } + let rng = sim::GlobalRng::new(seed); + let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; + rng.fill_bytes(buf); +} + +fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { + CURRENT_RNG.with(|current| { + let Some(rng) = current.borrow().clone() else { + return false; + }; + if buflen == 0 { + return true; + } + let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; + rng.fill_bytes(buf); + true + }) +} + +fn panic_with_seed(seed: u64, payload: Box) -> ! { + eprintln!("note: run with --seed {seed} to reproduce this error"); + std::panic::resume_unwind(payload); +} + +impl Drop for HandleContextGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + *slot.borrow_mut() = self.previous.take(); + }); + } +} + +impl Drop for RngContextGuard { + fn drop(&mut self) { + CURRENT_RNG.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +impl Drop for SimulationThreadGuard { + fn drop(&mut self) { + IN_SIMULATION.with(|state| { + state.set(self.previous); + }); + } +} + +#[cfg(unix)] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { + if in_simulation() { + eprintln!("attempt to spawn a system thread in simulation."); + eprintln!("note: use simulator tasks instead."); + return -1; + } + + type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; + static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); + let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); + std::mem::transmute(ptr) + }); + unsafe { original(attr) } +} + +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { + #[cfg(target_os = "macos")] + let _ = flags; + + if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { + fill_from_seed(buf, buflen, seed); + return buflen as isize; + } + if fill_from_current_rng(buf, buflen) { + return buflen as isize; + } + + #[cfg(target_os = "linux")] + { + type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; + static GETRANDOM: OnceLock = OnceLock::new(); + let original = GETRANDOM.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getrandom"); + std::mem::transmute(ptr) + }); + unsafe { original(buf, buflen, flags) } + } + + #[cfg(target_os = "macos")] + { + type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; + static GETENTROPY: OnceLock = OnceLock::new(); + let original = GETENTROPY.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getentropy"); + std::mem::transmute(ptr) + }); + match unsafe { original(buf, buflen) } { + -1 => -1, + 0 => buflen as isize, + _ => unreachable!("unexpected getentropy return value"), + } + } + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + let _ = (buf, buflen, flags); + compile_error!("unsupported OS for DST getrandom override"); + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { + if buflen > 256 { + return -1; + } + match unsafe { getrandom(buf, buflen, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(target_os = "macos")] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { + match unsafe { getrandom(bytes, count, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(test)] +mod tests { + use crate::sim; + + #[test] + #[cfg(unix)] + fn runtime_forbids_system_thread_spawn() { + let mut runtime = sim::Runtime::new(200); + runtime.block_on(async { + let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); + assert!(result.is_err()); + }); + } + + #[test] + fn getentropy_uses_current_sim_rng() { + let rng = sim::GlobalRng::new(20); + let _guard = enter_rng_context(rng.clone()); + + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + + let expected_rng = sim::GlobalRng::new(20); + let mut expected = [0u8; 24]; + expected_rng.fill_bytes(&mut expected); + assert_eq!(actual, expected); + } + + #[test] + fn std_hashmap_order_is_seeded_for_runtime_thread() { + fn order_for(seed: u64) -> Vec<(u64, u64)> { + std::thread::spawn(move || { + let _ = init_std_random_state(seed); + (0..12) + .map(|idx| (idx, idx)) + .collect::>() + .into_iter() + .collect() + }) + .join() + .unwrap() + } + + assert_eq!(order_for(30), order_for(30)); + } +} diff --git a/crates/runtime/src/adapter/tokio.rs b/crates/runtime/src/adapter/tokio.rs new file mode 100644 index 00000000000..5d605bba39a --- /dev/null +++ b/crates/runtime/src/adapter/tokio.rs @@ -0,0 +1,11 @@ +pub type TokioHandle = tokio::runtime::Handle; +pub type TokioRuntime = tokio::runtime::Runtime; + +pub fn current_handle_or_new_runtime() -> std::io::Result<(TokioHandle, Option)> { + if let Ok(handle) = TokioHandle::try_current() { + return Ok((handle, None)); + } + + let runtime = TokioRuntime::new()?; + Ok((runtime.handle().clone(), Some(runtime))) +} diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 7633ef08e40..8777409c402 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,19 +1,22 @@ +#![cfg_attr(not(any(feature = "tokio", feature = "simulation-std")), no_std)] + //! Runtime and deterministic simulation utilities shared by core and DST. -use std::{fmt, future::Future, time::Duration}; +extern crate alloc; + +use core::{fmt, future::Future, time::Duration}; +pub mod adapter; #[cfg(feature = "simulation")] pub mod sim; #[cfg(feature = "tokio")] -pub type Handle = tokio::runtime::Handle; -#[cfg(feature = "tokio")] -pub type Runtime = tokio::runtime::Runtime; +pub use adapter::tokio::{current_handle_or_new_runtime, TokioHandle, TokioRuntime}; #[derive(Clone)] -pub enum RuntimeDispatch { +pub enum Runtime { #[cfg(feature = "tokio")] - Tokio(Handle), + Tokio(TokioHandle), #[cfg(feature = "simulation")] Simulation(sim::Handle), } @@ -27,17 +30,18 @@ impl fmt::Display for RuntimeTimeout { } } +#[cfg(any(feature = "tokio", feature = "simulation-std"))] impl std::error::Error for RuntimeTimeout {} -impl RuntimeDispatch { +impl Runtime { #[cfg(feature = "tokio")] - pub fn tokio(handle: Handle) -> Self { + pub fn tokio(handle: TokioHandle) -> Self { Self::Tokio(handle) } #[cfg(feature = "tokio")] pub fn tokio_current() -> Self { - Self::tokio(Handle::current()) + Self::tokio(TokioHandle::current()) } #[cfg(feature = "simulation")] @@ -45,9 +49,9 @@ impl RuntimeDispatch { Self::Simulation(handle) } - #[cfg(feature = "simulation")] + #[cfg(feature = "simulation-std")] pub fn simulation_current() -> Self { - Self::simulation(sim::Handle::current().expect("simulation runtime is not active on this thread")) + adapter::sim_std::simulation_current() } pub fn spawn(&self, future: impl Future + Send + 'static) { @@ -102,21 +106,9 @@ impl RuntimeDispatch { .await .map_err(|_| RuntimeTimeout), #[cfg(feature = "simulation")] - Self::Simulation(_) => sim::time::timeout(timeout_after, future) - .await - .map_err(|_| RuntimeTimeout), + Self::Simulation(handle) => handle.timeout(timeout_after, future).await.map_err(|_| RuntimeTimeout), #[cfg(not(any(feature = "tokio", feature = "simulation")))] _ => unreachable!("runtime dispatch has no enabled backend"), } } } - -#[cfg(feature = "tokio")] -pub fn current_handle_or_new_runtime() -> anyhow::Result<(Handle, Option)> { - if let Ok(handle) = Handle::try_current() { - return Ok((handle, None)); - } - - let runtime = Runtime::new()?; - Ok((runtime.handle().clone(), Some(runtime))) -} diff --git a/crates/runtime/src/sim/buggify.rs b/crates/runtime/src/sim/buggify.rs new file mode 100644 index 00000000000..07188c6c207 --- /dev/null +++ b/crates/runtime/src/sim/buggify.rs @@ -0,0 +1,51 @@ +use crate::sim::Runtime; + +/// Probabilistic fault-injection helpers for simulation code. +/// +/// Reference: . +/// +/// Buggify is tied to a specific simulation runtime. Callers toggle it on that +/// runtime, then ask whether a fault should be injected at a particular point. +pub fn enable(runtime: &Runtime) { + runtime.enable_buggify(); +} + +/// Disable probabilistic fault injection for the given simulation runtime. +pub fn disable(runtime: &Runtime) { + runtime.disable_buggify(); +} + +/// Returns whether buggify is enabled for the given simulation runtime. +pub fn is_enabled(runtime: &Runtime) -> bool { + runtime.is_buggify_enabled() +} + +/// Returns whether the runtime should inject a fault at this point using the +/// default deterministic probability. +pub fn should_inject_fault(runtime: &Runtime) -> bool { + runtime.buggify() +} + +/// Returns whether the runtime should inject a fault at this point using the +/// provided deterministic probability. +pub fn should_inject_fault_with_prob(runtime: &Runtime, probability: f64) -> bool { + runtime.buggify_with_prob(probability) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn runtime_owned_buggify_controls_fault_injection() { + let runtime = Runtime::new(7); + + assert!(!is_enabled(&runtime)); + enable(&runtime); + assert!(is_enabled(&runtime)); + assert!(should_inject_fault_with_prob(&runtime, 1.0)); + disable(&runtime); + assert!(!is_enabled(&runtime)); + assert!(!should_inject_fault_with_prob(&runtime, 1.0)); + } +} diff --git a/crates/runtime/src/sim/config.rs b/crates/runtime/src/sim/config.rs new file mode 100644 index 00000000000..92ab8d0fdbc --- /dev/null +++ b/crates/runtime/src/sim/config.rs @@ -0,0 +1,16 @@ +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RuntimeConfig { + pub seed: u64, +} + +impl RuntimeConfig { + pub const fn new(seed: u64) -> Self { + Self { seed } + } +} + +impl Default for RuntimeConfig { + fn default() -> Self { + Self::new(0) + } +} diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 765b70f631b..e0a28afc4ba 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -1,29 +1,19 @@ //! Minimal asynchronous executor adapted from madsim's `sim/task` loop. -use std::{ - cell::RefCell, - collections::BTreeMap, +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use core::{ fmt, future::Future, - panic::AssertUnwindSafe, pin::Pin, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Mutex, - }, + sync::atomic::{AtomicBool, AtomicU64, Ordering}, task::{Context, Poll}, - thread::{self, Thread}, time::Duration, }; use futures_util::FutureExt; +use spin::Mutex; -use crate::sim::{ - rng::{enter_rng_context, DeterminismLog}, - system_thread::enter_simulation_thread, - time::{enter_time_context, TimeHandle}, - Rng, -}; +use crate::sim::{time::TimeHandle, Rng, RuntimeConfig}; type Runnable = async_task::Runnable; @@ -32,6 +22,7 @@ type Runnable = async_task::Runnable; pub struct NodeId(u64); impl NodeId { + /// The default node for single-node simulation or top-level runtime work. pub const MAIN: Self = Self(0); } @@ -51,39 +42,60 @@ pub struct Runtime { } impl Runtime { - pub fn new(seed: u64) -> anyhow::Result { - Ok(Self { - executor: Arc::new(Executor::new(seed)), - }) + /// Create a simulation runtime seeded for deterministic scheduling and RNG. + pub fn new(seed: u64) -> Self { + Self::with_config(RuntimeConfig::new(seed)) + } + + /// Create a simulation runtime from an explicit runtime configuration. + pub fn with_config(config: RuntimeConfig) -> Self { + Self { + executor: Arc::new(Executor::new(config)), + } } + /// Drive a top-level future to completion on the simulation executor. + /// + /// While the future runs, spawned tasks share the same deterministic + /// scheduler, timer wheel, and runtime RNG. pub fn block_on(&mut self, future: F) -> F::Output { - let _handle_context = enter_handle_context(self.handle()); self.executor.block_on(future) } + /// Return the amount of virtual time elapsed in this runtime. pub fn elapsed(&self) -> Duration { self.executor.elapsed() } + /// Get a cloneable handle for spawning tasks and accessing runtime services. pub fn handle(&self) -> Handle { Handle { executor: Arc::clone(&self.executor), } } + /// Create a new simulated node. + /// + /// Nodes are a scheduling/pausing boundary rather than separate executors: + /// all nodes still run on the same single-threaded runtime. pub fn create_node(&self) -> NodeId { self.handle().create_node() } + /// Pause scheduling for a node. + /// + /// Tasks already queued for the node are retained and will run only after + /// the node is resumed. pub fn pause(&self, node: NodeId) { self.handle().pause(node); } + /// Resume scheduling for a previously paused node. pub fn resume(&self, node: NodeId) { self.handle().resume(node); } + /// Spawn a `Send` future onto a specific simulated node. pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -92,49 +104,53 @@ impl Runtime { self.handle().spawn_on(node, future) } - /// Run a future twice with the same seed and fail if simulator choices diverge. - pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output - where - F: Future + 'static, - F::Output: Send + 'static, - { - Self::check_determinism_with(seed, make_future) + pub fn enable_buggify(&self) { + self.executor.enable_buggify(); } - /// Run a future twice with the same seed and fail if simulator choices diverge. - pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output - where - M: Fn() -> F + Clone + Send + 'static, - F: Future + 'static, - F::Output: Send + 'static, - { - let first = make_future.clone(); - let log = thread::spawn(move || { - let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); - runtime.executor.enable_determinism_log(); - runtime.block_on(first()); - runtime - .executor - .take_determinism_log() - .expect("determinism log should be enabled") - }) - .join() - .map_err(|payload| panic_with_seed(seed, payload)) - .unwrap(); - - thread::spawn(move || { - let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); - runtime.executor.enable_determinism_check(log); - let output = runtime.block_on(make_future()); - runtime - .executor - .finish_determinism_check() - .unwrap_or_else(|err| panic!("{err}")); - output - }) - .join() - .map_err(|payload| panic_with_seed(seed, payload)) - .unwrap() + /// Disable probabilistic fault injection for this runtime. + pub fn disable_buggify(&self) { + self.executor.disable_buggify(); + } + + /// Return whether buggify is enabled for this runtime. + pub fn is_buggify_enabled(&self) -> bool { + self.executor.is_buggify_enabled() + } + + /// Sample the default runtime buggify probability. + pub fn buggify(&self) -> bool { + self.executor.buggify() + } + + /// Sample a caller-provided runtime buggify probability. + pub fn buggify_with_prob(&self, probability: f64) -> bool { + self.executor.buggify_with_prob(probability) + } + + #[allow(dead_code)] + pub(crate) fn enable_determinism_log(&self) { + self.executor.rng.enable_determinism_log(); + } + + #[allow(dead_code)] + pub(crate) fn enable_determinism_check(&self, log: crate::sim::DeterminismLog) { + self.executor.rng.enable_determinism_check(log); + } + + #[allow(dead_code)] + pub(crate) fn take_determinism_log(&self) -> Option { + self.executor.rng.take_determinism_log() + } + + #[allow(dead_code)] + pub(crate) fn finish_determinism_check(&self) -> Result<(), alloc::string::String> { + self.executor.rng.finish_determinism_check() + } + + #[allow(dead_code)] + pub(crate) fn rng(&self) -> Rng { + self.executor.rng.clone() } } @@ -145,22 +161,22 @@ pub struct Handle { } impl Handle { - pub fn current() -> Option { - current_handle() - } - + /// Create a new simulated node owned by this runtime. pub fn create_node(&self) -> NodeId { self.executor.create_node() } + /// Pause scheduling for a node. pub fn pause(&self, node: NodeId) { self.executor.pause(node); } + /// Resume scheduling for a node and requeue any buffered tasks for it. pub fn resume(&self, node: NodeId) { self.executor.resume(node); } + /// Spawn a `Send` future onto a specific simulated node. pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -169,6 +185,9 @@ impl Handle { self.executor.spawn_on(node, future) } + /// Spawn a non-`Send` future onto a specific simulated node. + /// + /// This is only valid because the simulation executor is single-threaded. pub fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + 'static, @@ -176,30 +195,53 @@ impl Handle { { self.executor.spawn_local_on(node, future) } -} -thread_local! { - static CURRENT_HANDLE: RefCell> = RefCell::new(None); -} + /// Return the current virtual time for this runtime. + pub fn now(&self) -> Duration { + self.executor.time.now() + } -pub(crate) fn current_handle() -> Option { - CURRENT_HANDLE.with(|handle| handle.borrow().clone()) -} + /// Move virtual time forward explicitly. + pub fn advance(&self, duration: Duration) { + self.executor.time.advance(duration); + } -fn enter_handle_context(handle: Handle) -> HandleContextGuard { - let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); - HandleContextGuard { previous } -} + /// Create a future that becomes ready after `duration` of virtual time. + pub fn sleep(&self, duration: Duration) -> crate::sim::time::Sleep { + self.executor.time.sleep(duration) + } -struct HandleContextGuard { - previous: Option, -} + /// Race a future against a virtual-time timeout. + pub async fn timeout( + &self, + duration: Duration, + future: impl Future, + ) -> Result { + self.executor.time.timeout(duration, future).await + } -impl Drop for HandleContextGuard { - fn drop(&mut self) { - CURRENT_HANDLE.with(|slot| { - *slot.borrow_mut() = self.previous.take(); - }); + pub fn enable_buggify(&self) { + self.executor.enable_buggify(); + } + + /// Disable probabilistic fault injection for this runtime. + pub fn disable_buggify(&self) { + self.executor.disable_buggify(); + } + + /// Return whether buggify is enabled for this runtime. + pub fn is_buggify_enabled(&self) -> bool { + self.executor.is_buggify_enabled() + } + + /// Sample the default runtime buggify probability. + pub fn buggify(&self) -> bool { + self.executor.buggify() + } + + /// Sample a caller-provided runtime buggify probability. + pub fn buggify_with_prob(&self, probability: f64) -> bool { + self.executor.buggify_with_prob(probability) } } @@ -209,6 +251,7 @@ pub struct JoinHandle { } impl JoinHandle { + /// Detach the task so it continues running without awaiting its output. pub fn detach(self) { self.task.detach(); } @@ -222,31 +265,32 @@ impl Future for JoinHandle { } } -fn panic_with_seed(seed: u64, payload: Box) -> ! { - eprintln!("note: run with --seed {seed} to reproduce this error"); - std::panic::resume_unwind(payload); -} - +/// Core single-threaded scheduler backing a simulation [`Runtime`]. +/// +/// The executor owns the runnable queue, per-node pause state, deterministic +/// RNG, and virtual time. Tasks are selected from the queue using the runtime +/// RNG so the schedule is reproducible for a given seed. struct Executor { queue: Receiver, sender: Sender, - nodes: Mutex>>, - next_node: std::sync::atomic::AtomicU64, - rng: Arc>, + nodes: spin::Mutex>>, + next_node: AtomicU64, + rng: Rng, time: TimeHandle, } impl Executor { - fn new(seed: u64) -> Self { + /// Construct a fresh executor with one default `MAIN` node. + fn new(config: RuntimeConfig) -> Self { let queue = Queue::new(); let mut nodes = BTreeMap::new(); nodes.insert(NodeId::MAIN, Arc::new(NodeState::default())); Self { queue: queue.receiver(), sender: queue.sender(), - nodes: Mutex::new(nodes), - next_node: std::sync::atomic::AtomicU64::new(1), - rng: Arc::new(Mutex::new(Rng::new(seed))), + nodes: spin::Mutex::new(nodes), + next_node: AtomicU64::new(1), + rng: Rng::new(config.seed), time: TimeHandle::new(), } } @@ -255,45 +299,49 @@ impl Executor { self.time.now() } - fn enable_determinism_log(&self) { - self.rng.lock().expect("sim rng poisoned").enable_determinism_log(); + fn enable_buggify(&self) { + self.rng.enable_buggify(); } - fn enable_determinism_check(&self, log: DeterminismLog) { - self.rng.lock().expect("sim rng poisoned").enable_determinism_check(log); + fn disable_buggify(&self) { + self.rng.disable_buggify(); } - fn take_determinism_log(&self) -> Option { - self.rng.lock().expect("sim rng poisoned").take_determinism_log() + fn is_buggify_enabled(&self) -> bool { + self.rng.is_buggify_enabled() } - fn finish_determinism_check(&self) -> Result<(), String> { - self.rng.lock().expect("sim rng poisoned").finish_determinism_check() + fn buggify(&self) -> bool { + self.rng.buggify() + } + + fn buggify_with_prob(&self, probability: f64) -> bool { + self.rng.buggify_with_prob(probability) } fn create_node(&self) -> NodeId { let id = NodeId(self.next_node.fetch_add(1, Ordering::Relaxed)); - self.nodes - .lock() - .expect("nodes poisoned") - .insert(id, Arc::new(NodeState::default())); + self.nodes.lock().insert(id, Arc::new(NodeState::default())); id } + /// Mark a node as paused so newly selected runnables are buffered. fn pause(&self, node: NodeId) { self.node_state(node).paused.store(true, Ordering::Relaxed); } + /// Mark a node as runnable again and requeue any buffered tasks for it. fn resume(&self, node: NodeId) { let state = self.node_state(node); state.paused.store(false, Ordering::Relaxed); - let mut paused = state.paused_queue.lock().expect("paused queue poisoned"); + let mut paused = state.paused_queue.lock(); for runnable in paused.drain(..) { self.sender.send(runnable); } } + /// Spawn a `Send` task and enqueue its runnable on the shared runtime queue. fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -310,6 +358,7 @@ impl Executor { JoinHandle { task } } + /// Spawn a non-`Send` task on the single-threaded runtime. fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + 'static, @@ -329,12 +378,12 @@ impl Executor { } #[track_caller] + /// Run the top-level future until completion. + /// + /// The executor repeatedly drains runnable tasks, then advances virtual + /// time to the next timer when the queue is empty. If neither runnable work + /// nor timers remain, the simulation is considered deadlocked. fn block_on(&self, future: F) -> F::Output { - let _system_thread_context = enter_simulation_thread(); - let _rng_context = enter_rng_context(Arc::clone(&self.rng)); - let _time_context = enter_time_context(self.time.clone()); - let _waiter = WaiterGuard::new(&self.queue, thread::current()); - let sender = self.sender.clone(); let (runnable, task) = unsafe { async_task::Builder::new() @@ -357,41 +406,48 @@ impl Executor { } } + /// Drain the runnable queue, selecting tasks in deterministic RNG order. + /// + /// Paused-node tasks are diverted into that node's paused buffer instead of + /// being polled immediately. fn run_all_ready(&self) { while let Some(runnable) = self.queue.try_recv_random(&self.rng) { let node = *runnable.metadata(); let state = self.node_state(node); if state.paused.load(Ordering::Relaxed) { - state.paused_queue.lock().expect("paused queue poisoned").push(runnable); + state.paused_queue.lock().push(runnable); continue; } - let result = std::panic::catch_unwind(AssertUnwindSafe(|| runnable.run())); - if let Err(payload) = result { - std::panic::resume_unwind(payload); - } + runnable.run(); } } + /// Look up the scheduling state for a node, panicking if the node is unknown. fn node_state(&self, node: NodeId) -> Arc { self.nodes .lock() - .expect("nodes poisoned") .get(&node) .cloned() .unwrap_or_else(|| panic!("unknown simulated node {node}")) } } +/// Per-node scheduler state shared by tasks assigned to that node. #[derive(Clone, Default)] struct NodeState { paused: Arc, paused_queue: Arc>>, } +/// Yield back to the scheduler once. +/// +/// This is the smallest explicit interleaving point available to simulated +/// tasks when they need to give other runnables a chance to execute. pub async fn yield_now() { YieldNow { yielded: false }.await } +/// One-shot future backing [`yield_now`]. struct YieldNow { yielded: bool, } @@ -410,40 +466,26 @@ impl Future for YieldNow { } } -struct WaiterGuard<'a> { - receiver: &'a Receiver, -} - -impl<'a> WaiterGuard<'a> { - fn new(receiver: &'a Receiver, thread: Thread) -> Self { - receiver.set_waiter(Some(thread)); - Self { receiver } - } -} - -impl Drop for WaiterGuard<'_> { - fn drop(&mut self) { - self.receiver.set_waiter(None); - } -} - +/// Shared runnable queue used by the simulation executor. struct Queue { inner: Arc, } +/// Sending end of the runnable queue. #[derive(Clone)] struct Sender { inner: Arc, } +/// Receiving end of the runnable queue. #[derive(Clone)] struct Receiver { inner: Arc, } +/// Queue storage for runnables awaiting scheduling. struct QueueInner { queue: Mutex>, - waiter: Mutex>, } impl Queue { @@ -451,7 +493,6 @@ impl Queue { Self { inner: Arc::new(QueueInner { queue: Mutex::new(Vec::new()), - waiter: Mutex::new(None), }), } } @@ -470,25 +511,20 @@ impl Queue { } impl Sender { + /// Push a runnable onto the shared queue. fn send(&self, runnable: Runnable) { - self.inner.queue.lock().expect("run queue poisoned").push(runnable); - if let Some(thread) = self.inner.waiter.lock().expect("waiter poisoned").as_ref() { - thread.unpark(); - } + self.inner.queue.lock().push(runnable); } } impl Receiver { - fn set_waiter(&self, thread: Option) { - *self.inner.waiter.lock().expect("waiter poisoned") = thread; - } - - fn try_recv_random(&self, rng: &Mutex) -> Option { - let mut queue = self.inner.queue.lock().expect("run queue poisoned"); + /// Remove one runnable using the runtime RNG to choose among ready tasks. + fn try_recv_random(&self, rng: &Rng) -> Option { + let mut queue = self.inner.queue.lock(); if queue.is_empty() { return None; } - let idx = rng.lock().expect("rng poisoned").index(queue.len()); + let idx = rng.index(queue.len()); Some(queue.swap_remove(idx)) } } @@ -501,10 +537,11 @@ mod tests { }; use super::*; + use crate::sim::RuntimeConfig; #[test] fn paused_node_does_not_run_until_resumed() { - let mut runtime = Runtime::new(1).unwrap(); + let mut runtime = Runtime::new(1); let node = runtime.create_node(); runtime.pause(node); @@ -527,7 +564,7 @@ mod tests { #[test] fn handle_can_spawn_onto_node_from_simulated_task() { - let mut runtime = Runtime::new(2).unwrap(); + let mut runtime = Runtime::new(2); let handle = runtime.handle(); let value = runtime.block_on(async move { @@ -538,13 +575,45 @@ mod tests { assert_eq!(value, 11); } + #[test] + fn runtime_config_sets_seed() { + let runtime = Runtime::with_config(RuntimeConfig::new(77)); + let handle = runtime.handle(); + handle.enable_buggify(); + + let actual = (0..8).map(|_| handle.buggify_with_prob(0.5)).collect::>(); + + let expected = { + let mut rng = Rng::new(77); + rng.enable_buggify(); + (0..8).map(|_| rng.buggify_with_prob(0.5)).collect::>() + }; + + assert_eq!(actual, expected); + } + + #[test] + fn runtime_and_handle_share_buggify_state() { + let runtime = Runtime::new(6); + let handle = runtime.handle(); + + assert!(!runtime.is_buggify_enabled()); + runtime.enable_buggify(); + assert!(handle.is_buggify_enabled()); + assert!(handle.buggify_with_prob(1.0)); + handle.disable_buggify(); + assert!(!runtime.is_buggify_enabled()); + } + + #[cfg(feature = "simulation-std")] #[test] fn current_handle_can_spawn_local_task_inside_runtime() { - assert!(Handle::current().is_none()); + assert!(crate::adapter::sim_std::current_handle().is_none()); - let mut runtime = Runtime::new(5).unwrap(); - let value = runtime.block_on(async { - let handle = Handle::current().expect("sim handle should be present inside block_on"); + let mut runtime = Runtime::new(5); + let value = crate::adapter::sim_std::block_on(&mut runtime, async { + let handle = + crate::adapter::sim_std::current_handle().expect("sim handle should be present inside block_on"); let node = handle.create_node(); let captured = std::rc::Rc::new(17); handle @@ -556,15 +625,16 @@ mod tests { }); assert_eq!(value, 17); - assert!(Handle::current().is_none()); + assert!(crate::adapter::sim_std::current_handle().is_none()); } + #[cfg(feature = "simulation-std")] #[test] fn check_determinism_runs_future_twice() { static CALLS: AtomicUsize = AtomicUsize::new(0); CALLS.store(0, Ordering::SeqCst); - let value = Runtime::check_determinism(3, || async { + let value = crate::adapter::sim_std::check_determinism(3, || async { CALLS.fetch_add(1, Ordering::SeqCst); yield_now().await; 13 @@ -574,13 +644,14 @@ mod tests { assert_eq!(CALLS.load(Ordering::SeqCst), 2); } + #[cfg(feature = "simulation-std")] #[test] #[should_panic(expected = "non-determinism detected")] fn check_determinism_rejects_different_scheduler_sequence() { static FIRST_RUN: AtomicBool = AtomicBool::new(true); FIRST_RUN.store(true, Ordering::SeqCst); - Runtime::check_determinism(4, || async { + crate::adapter::sim_std::check_determinism(4, || async { if FIRST_RUN.swap(false, Ordering::SeqCst) { yield_now().await; } diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index 467903cf2b4..1b778f96d62 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -4,20 +4,13 @@ //! futures are scheduled as runnable tasks and the ready queue is sampled by a //! deterministic RNG instead of being driven by a package-level async runtime. +pub mod buggify; +mod config; mod executor; mod rng; -mod system_thread; pub mod time; -use std::time::Duration; - +pub use config::RuntimeConfig; pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; -pub use rng::{DecisionSource, Rng}; - -pub fn advance_time(duration: Duration) { - time::advance(duration); -} - -pub fn decision_source(seed: u64) -> DecisionSource { - DecisionSource::new(seed) -} +pub(crate) use rng::DeterminismLog; +pub use rng::{GlobalRng, Rng}; diff --git a/crates/runtime/src/sim/rng.rs b/crates/runtime/src/sim/rng.rs index 09afde03031..602eae59979 100644 --- a/crates/runtime/src/sim/rng.rs +++ b/crates/runtime/src/sim/rng.rs @@ -1,135 +1,190 @@ -use std::{ - cell::{Cell, RefCell}, - ptr, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, Mutex, OnceLock, - }, -}; +use alloc::{format, string::String}; +use alloc::{sync::Arc, vec::Vec}; +use spin::Mutex; -const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; +pub type Rng = GlobalRng; +/// Shared deterministic RNG for the simulation core. +/// +/// The simulator owns one runtime-wide RNG handle and uses it for scheduler +/// choices, probabilistic fault injection, and determinism checks. Hosted +/// conveniences such as thread-local current-RNG access and libc random hooks +/// live in `adapter::sim_std`, not here. #[derive(Clone, Debug)] -pub struct Rng { +pub struct GlobalRng { + inner: Arc>, +} + +#[derive(Debug)] +struct Inner { + /// Seed used to initialize the runtime RNG, carried for diagnostics and replay. seed: u64, - state: u64, + /// Deterministic generator used for scheduler choices and fault injection decisions. + rng: SplitMix64, + /// Checkpoints recorded during the first determinism run. log: Option>, + /// Expected checkpoints plus the number already consumed during replay. check: Option<(Vec, usize)>, + /// Whether probabilistic fault injection is currently enabled for this runtime. + buggify_enabled: bool, } -impl Rng { - pub fn new(seed: u64) -> Self { - unsafe { getentropy(ptr::null_mut(), 0) }; - if !init_std_random_state(seed) { - tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +/// Reference for SplitMix64 algorithm: https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64 +/// Splitmix64 is the default pseudo-random number generator algorithm. +/// It uses a fairly simple algorithm that, though it is considered +/// to be poor for cryptographic purposes, is very fast to calculate, +/// and is "good enough" for many random number needs. +/// It passes several fairly rigorous PRNG "fitness" tests that some more complex algorithms fail. +#[derive(Clone, Debug)] +struct SplitMix64 { + state: u64, +} + +impl SplitMix64 { + fn new(seed: u64) -> Self { + Self { state: seed } + } + + fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(GAMMA); + mix64(self.state) + } + + fn fill_bytes(&mut self, dest: &mut [u8]) { + for chunk in dest.chunks_mut(core::mem::size_of::()) { + let bytes = self.next_u64().to_ne_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); } + } +} + +fn mix64(mut x: u64) -> u64 { + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} + +impl GlobalRng { + /// Create a new deterministic RNG for a simulation runtime. + pub fn new(seed: u64) -> Self { Self { - seed, - state: splitmix64(seed), - log: None, - check: None, + inner: Arc::new(Mutex::new(Inner { + seed, + rng: SplitMix64::new(seed), + log: None, + check: None, + buggify_enabled: false, + })), } } - pub fn next_u64(&mut self) -> u64 { - self.state = self.state.wrapping_add(GAMMA); - let value = splitmix64(self.state); - self.record_checkpoint(value); - value + pub fn next_u64(&self) -> u64 { + self.with_inner(|inner| inner.rng.next_u64()) } - pub fn index(&mut self, len: usize) -> usize { + pub fn index(&self, len: usize) -> usize { assert!(len > 0, "len must be non-zero"); (self.next_u64() as usize) % len } - pub fn sample_probability(&mut self, probability: f64) -> bool { + pub fn sample_probability(&self, probability: f64) -> bool { probability_sample(self.next_u64(), probability) } - pub(crate) fn fill_bytes(&mut self, dest: &mut [u8]) { - for chunk in dest.chunks_mut(std::mem::size_of::()) { - let bytes = self.next_u64().to_ne_bytes(); - chunk.copy_from_slice(&bytes[..chunk.len()]); - } + pub fn enable_buggify(&self) { + self.inner.lock().buggify_enabled = true; } - pub(crate) fn enable_determinism_log(&mut self) { - self.log = Some(Vec::new()); - self.check = None; + pub fn disable_buggify(&self) { + self.inner.lock().buggify_enabled = false; } - pub(crate) fn enable_determinism_check(&mut self, log: DeterminismLog) { - self.check = Some((log.0, 0)); - self.log = None; + pub fn is_buggify_enabled(&self) -> bool { + self.inner.lock().buggify_enabled } - pub(crate) fn take_determinism_log(&mut self) -> Option { - self.log - .take() - .or_else(|| self.check.take().map(|(log, _)| log)) - .map(DeterminismLog) + pub fn buggify(&self) -> bool { + self.buggify_with_prob(0.25) } - pub(crate) fn finish_determinism_check(&self) -> Result<(), String> { - if let Some((log, consumed)) = &self.check - && *consumed != log.len() - { - return Err(format!( - "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", - self.seed, - log.len() - )); - } - Ok(()) + pub fn buggify_with_prob(&self, probability: f64) -> bool { + self.is_buggify_enabled() && self.sample_probability(probability) } - fn record_checkpoint(&mut self, value: u64) { - if self.log.is_none() && self.check.is_none() { - return; - } + #[allow(dead_code)] + pub(crate) fn seed(&self) -> u64 { + self.inner.lock().seed + } - let checkpoint = checksum(value); - if let Some(log) = &mut self.log { - log.push(checkpoint); - } - if let Some((expected, consumed)) = &mut self.check { - if expected.get(*consumed) != Some(&checkpoint) { - panic!( - "non-determinism detected for seed {} at checkpoint {consumed}", - self.seed - ); + fn with_inner(&self, f: impl FnOnce(&mut Inner) -> T) -> T { + let mut inner = self.inner.lock(); + let output = f(&mut inner); + if inner.log.is_some() || inner.check.is_some() { + let checkpoint = checksum(inner.rng.clone().next_u64()); + if let Some(log) = &mut inner.log { + log.push(checkpoint); + } + let seed = inner.seed; + if let Some((expected, consumed)) = &mut inner.check { + if expected.get(*consumed) != Some(&checkpoint) { + panic!("non-determinism detected for seed {} at checkpoint {consumed}", seed); + } + *consumed += 1; } - *consumed += 1; } + output } -} -#[derive(Debug, Clone, Eq, PartialEq)] -pub(crate) struct DeterminismLog(Vec); + #[allow(dead_code)] + pub(crate) fn fill_bytes(&self, dest: &mut [u8]) { + self.with_inner(|inner| inner.rng.fill_bytes(dest)); + } -#[derive(Debug)] -pub struct DecisionSource { - state: AtomicU64, -} + #[allow(dead_code)] + pub(crate) fn enable_determinism_log(&self) { + let mut inner = self.inner.lock(); + inner.log = Some(Vec::new()); + inner.check = None; + } -impl DecisionSource { - pub fn new(seed: u64) -> Self { - Self { - state: AtomicU64::new(splitmix64(seed)), - } + #[allow(dead_code)] + pub(crate) fn enable_determinism_check(&self, log: DeterminismLog) { + let mut inner = self.inner.lock(); + inner.check = Some((log.0, 0)); + inner.log = None; } - pub fn sample_probability(&self, probability: f64) -> bool { - probability_sample(self.next_u64(), probability) + #[allow(dead_code)] + pub(crate) fn take_determinism_log(&self) -> Option { + let mut inner = self.inner.lock(); + inner + .log + .take() + .or_else(|| inner.check.take().map(|(log, _)| log)) + .map(DeterminismLog) } - fn next_u64(&self) -> u64 { - let state = self.state.fetch_add(GAMMA, Ordering::Relaxed).wrapping_add(GAMMA); - splitmix64(state) + #[allow(dead_code)] + pub(crate) fn finish_determinism_check(&self) -> Result<(), String> { + let inner = self.inner.lock(); + if let Some((log, consumed)) = &inner.check { + if *consumed != log.len() { + return Err(format!( + "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", + inner.seed, + log.len() + )); + } + } + Ok(()) } } +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) struct DeterminismLog(Vec); + fn probability_sample(value: u64, probability: f64) -> bool { if probability <= 0.0 { return false; @@ -138,169 +193,26 @@ fn probability_sample(value: u64, probability: f64) -> bool { return true; } - // Use the top 53 bits to build an exactly representable f64 in [0, 1). let unit = (value >> 11) as f64 * (1.0 / ((1u64 << 53) as f64)); unit < probability } -fn splitmix64(mut x: u64) -> u64 { - x = x.wrapping_add(GAMMA); - x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); - x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); - x ^ (x >> 31) -} - fn checksum(value: u64) -> u8 { value.to_ne_bytes().into_iter().fold(0, |acc, byte| acc ^ byte) } -thread_local! { - static CURRENT_RNG: RefCell>>> = const { RefCell::new(None) }; - static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; -} - -pub(crate) struct RngContextGuard { - previous: Option>>, -} - -pub(crate) fn enter_rng_context(rng: Arc>) -> RngContextGuard { - let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); - RngContextGuard { previous } -} - -impl Drop for RngContextGuard { - fn drop(&mut self) { - CURRENT_RNG.with(|current| { - current.replace(self.previous.take()); - }); - } -} - -fn init_std_random_state(seed: u64) -> bool { - STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); - let _ = std::collections::hash_map::RandomState::new(); - STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() -} - -fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { - if buflen == 0 { - return; - } - let mut state = splitmix64(seed); - let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; - for chunk in buf.chunks_mut(std::mem::size_of::()) { - state = state.wrapping_add(GAMMA); - let bytes = splitmix64(state).to_ne_bytes(); - chunk.copy_from_slice(&bytes[..chunk.len()]); - } -} - -fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { - CURRENT_RNG.with(|current| { - let Some(rng) = current.borrow().clone() else { - return false; - }; - if buflen == 0 { - return true; - } - let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; - rng.lock().expect("sim rng poisoned").fill_bytes(buf); - true - }) -} - -/// Obtain random bytes through the simulation RNG when running inside the DST executor. -/// -/// This mirrors madsim's libc-level hook. It covers libc users and macOS -/// `CCRandomGenerateBytes`; crates that issue raw kernel syscalls can still -/// bypass it. -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { - #[cfg(target_os = "macos")] - let _ = flags; - - if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { - fill_from_seed(buf, buflen, seed); - return buflen as isize; - } - if fill_from_current_rng(buf, buflen) { - return buflen as isize; - } - - #[cfg(target_os = "linux")] - { - type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; - static GETRANDOM: OnceLock = OnceLock::new(); - let original = GETRANDOM.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original getrandom"); - std::mem::transmute(ptr) - }); - unsafe { original(buf, buflen, flags) } - } - - #[cfg(target_os = "macos")] - { - type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; - static GETENTROPY: OnceLock = OnceLock::new(); - let original = GETENTROPY.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original getentropy"); - std::mem::transmute(ptr) - }); - match unsafe { original(buf, buflen) } { - -1 => -1, - 0 => buflen as isize, - _ => unreachable!("unexpected getentropy return value"), - } - } - - #[cfg(not(any(target_os = "linux", target_os = "macos")))] - { - let _ = (buf, buflen, flags); - compile_error!("unsupported OS for DST getrandom override"); - } -} - -/// Fill a buffer with random bytes through the same hook used by libc. -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { - if buflen > 256 { - return -1; - } - match unsafe { getrandom(buf, buflen, 0) } { - -1 => -1, - _ => 0, - } -} - -/// macOS uses CommonCrypto for process randomness in newer Rust toolchains. -#[cfg(target_os = "macos")] -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { - match unsafe { getrandom(bytes, count, 0) } { - -1 => -1, - _ => 0, - } -} - #[cfg(test)] mod tests { - use std::{collections::HashMap, sync::Arc}; - use super::*; #[test] fn rng_log_check_accepts_same_sequence() { - let mut first = Rng::new(10); + let first = Rng::new(10); first.enable_determinism_log(); let first_values = (0..8).map(|_| first.next_u64()).collect::>(); let log = first.take_determinism_log().unwrap(); - let mut second = Rng::new(10); + let second = Rng::new(10); second.enable_determinism_check(log); let second_values = (0..8).map(|_| second.next_u64()).collect::>(); second.finish_determinism_check().unwrap(); @@ -309,59 +221,22 @@ mod tests { } #[test] - fn decision_source_matches_rng_sequence() { - let source = DecisionSource::new(12); - let mut rng = Rng::new(12); - - for _ in 0..16 { - assert_eq!(source.next_u64(), rng.next_u64()); - } - } - - #[test] - #[should_panic(expected = "non-determinism detected")] - fn rng_log_check_rejects_different_sequence() { - let mut first = Rng::new(10); - first.enable_determinism_log(); - first.next_u64(); - let log = first.take_determinism_log().unwrap(); - - let mut second = Rng::new(11); - second.enable_determinism_check(log); - second.next_u64(); - } - - #[test] - fn getentropy_uses_current_sim_rng() { - let rng = Arc::new(Mutex::new(Rng::new(20))); - let _guard = enter_rng_context(Arc::clone(&rng)); - - let mut actual = [0u8; 24]; - unsafe { - assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + fn buggify_is_disabled_by_default() { + let rng = Rng::new(20); + for _ in 0..8 { + assert!(!rng.buggify()); + assert!(!rng.buggify_with_prob(1.0)); } - - let mut expected_rng = Rng::new(20); - let mut expected = [0u8; 24]; - expected_rng.fill_bytes(&mut expected); - assert_eq!(actual, expected); } #[test] - fn std_hashmap_order_is_seeded_for_runtime_thread() { - fn order_for(seed: u64) -> Vec<(u64, u64)> { - std::thread::spawn(move || { - let _rng = Rng::new(seed); - (0..12) - .map(|idx| (idx, idx)) - .collect::>() - .into_iter() - .collect() - }) - .join() - .unwrap() - } - - assert_eq!(order_for(30), order_for(30)); + fn buggify_obeys_enable_and_disable() { + let rng = Rng::new(21); + rng.enable_buggify(); + assert!(rng.is_buggify_enabled()); + assert!(rng.buggify_with_prob(1.0)); + rng.disable_buggify(); + assert!(!rng.is_buggify_enabled()); + assert!(!rng.buggify_with_prob(1.0)); } } diff --git a/crates/runtime/src/sim/system_thread.rs b/crates/runtime/src/sim/system_thread.rs deleted file mode 100644 index f395a25442a..00000000000 --- a/crates/runtime/src/sim/system_thread.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! Guard against creating OS threads from inside the simulator. - -use std::{cell::Cell, sync::OnceLock}; - -thread_local! { - static IN_SIMULATION: Cell = const { Cell::new(false) }; -} - -pub(crate) struct SimulationThreadGuard { - previous: bool, -} - -pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { - let previous = IN_SIMULATION.with(|state| state.replace(true)); - SimulationThreadGuard { previous } -} - -impl Drop for SimulationThreadGuard { - fn drop(&mut self) { - IN_SIMULATION.with(|state| { - state.set(self.previous); - }); - } -} - -fn in_simulation() -> bool { - IN_SIMULATION.with(Cell::get) -} - -/// Forbid creating system threads in simulation. -#[cfg(unix)] -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { - if in_simulation() { - eprintln!("attempt to spawn a system thread in simulation."); - eprintln!("note: use simulator tasks instead."); - return -1; - } - - type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; - static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); - let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); - std::mem::transmute(ptr) - }); - unsafe { original(attr) } -} - -#[cfg(test)] -mod tests { - use crate::sim; - - #[test] - #[cfg(unix)] - fn runtime_forbids_system_thread_spawn() { - let mut runtime = sim::Runtime::new(200).unwrap(); - runtime.block_on(async { - let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); - assert!(result.is_err()); - }); - } -} diff --git a/crates/runtime/src/sim/time.rs b/crates/runtime/src/sim/time.rs deleted file mode 100644 index 2508b35b249..00000000000 --- a/crates/runtime/src/sim/time.rs +++ /dev/null @@ -1,343 +0,0 @@ -//! Virtual time for the local simulation runtime. - -use std::{ - cell::RefCell, - collections::BTreeMap, - fmt, - future::Future, - pin::Pin, - sync::{Arc, Mutex}, - task::{Context, Poll, Waker}, - time::Duration, -}; - -use futures::future::{select, Either}; - -#[derive(Clone, Debug)] -pub struct TimeHandle { - inner: Arc>, -} - -impl TimeHandle { - pub fn new() -> Self { - Self { - inner: Arc::new(Mutex::new(TimeState::default())), - } - } - - pub fn now(&self) -> Duration { - self.inner.lock().expect("sim time poisoned").now - } - - pub fn advance(&self, duration: Duration) { - if duration.is_zero() { - return; - } - - let wakers = { - let mut state = self.inner.lock().expect("sim time poisoned"); - state.now = state.now.saturating_add(duration); - state.take_due_wakers() - }; - wake_all(wakers); - } - - pub fn wake_next_timer(&self) -> bool { - let wakers = { - let mut state = self.inner.lock().expect("sim time poisoned"); - let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { - return false; - }; - if next_deadline > state.now { - state.now = next_deadline; - } - state.take_due_wakers() - }; - let woke = !wakers.is_empty(); - wake_all(wakers); - woke - } - - fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { - let mut state = self.inner.lock().expect("sim time poisoned"); - state.timers.insert( - id, - TimerEntry { - deadline, - waker: waker.clone(), - }, - ); - } - - fn cancel_timer(&self, id: TimerId) { - self.inner.lock().expect("sim time poisoned").timers.remove(&id); - } - - fn next_timer_id(&self) -> TimerId { - let mut state = self.inner.lock().expect("sim time poisoned"); - let id = TimerId(state.next_timer_id); - state.next_timer_id = state.next_timer_id.saturating_add(1); - id - } -} - -impl Default for TimeHandle { - fn default() -> Self { - Self::new() - } -} - -#[derive(Debug, Default)] -struct TimeState { - now: Duration, - next_timer_id: u64, - timers: BTreeMap, -} - -impl TimeState { - fn take_due_wakers(&mut self) -> Vec { - let due = self - .timers - .iter() - .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) - .collect::>(); - due.into_iter() - .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) - .collect() - } -} - -#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -struct TimerId(u64); - -#[derive(Debug)] -struct TimerEntry { - deadline: Duration, - waker: Waker, -} - -thread_local! { - static CURRENT_TIME: RefCell> = const { RefCell::new(None) }; -} - -pub struct TimeContextGuard { - previous: Option, -} - -pub fn enter_time_context(handle: TimeHandle) -> TimeContextGuard { - let previous = CURRENT_TIME.with(|current| current.replace(Some(handle))); - TimeContextGuard { previous } -} - -pub fn try_current_handle() -> Option { - CURRENT_TIME.with(|current| current.borrow().clone()) -} - -pub fn now() -> Duration { - try_current_handle().map(|handle| handle.now()).unwrap_or_default() -} - -pub fn advance(duration: Duration) { - if let Some(handle) = try_current_handle() { - handle.advance(duration); - } -} - -pub fn sleep(duration: Duration) -> Sleep { - Sleep { - duration, - state: SleepState::Unregistered, - } -} - -pub async fn timeout(duration: Duration, future: impl Future) -> Result { - futures::pin_mut!(future); - let sleep = sleep(duration); - futures::pin_mut!(sleep); - - match select(future, sleep).await { - Either::Left((output, _)) => Ok(output), - Either::Right(((), _)) => Err(TimeoutElapsed { duration }), - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct TimeoutElapsed { - duration: Duration, -} - -impl TimeoutElapsed { - pub fn duration(self) -> Duration { - self.duration - } -} - -impl fmt::Display for TimeoutElapsed { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "simulated timeout elapsed after {:?}", self.duration) - } -} - -impl std::error::Error for TimeoutElapsed {} - -impl Drop for TimeContextGuard { - fn drop(&mut self) { - CURRENT_TIME.with(|current| { - current.replace(self.previous.take()); - }); - } -} - -pub struct Sleep { - duration: Duration, - state: SleepState, -} - -enum SleepState { - Unregistered, - Registered { - handle: TimeHandle, - id: TimerId, - deadline: Duration, - }, - Done, -} - -impl Future for Sleep { - type Output = (); - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - if matches!(self.state, SleepState::Done) { - return Poll::Ready(()); - } - - if matches!(self.state, SleepState::Unregistered) { - let handle = try_current_handle().expect("sim::time::sleep polled outside sim runtime"); - let deadline = handle.now().saturating_add(self.duration); - let id = handle.next_timer_id(); - self.state = SleepState::Registered { handle, id, deadline }; - } - - let SleepState::Registered { handle, id, deadline } = &self.state else { - unreachable!("sleep state should be registered or done"); - }; - - if handle.now() >= *deadline { - let handle = handle.clone(); - let id = *id; - handle.cancel_timer(id); - self.state = SleepState::Done; - Poll::Ready(()) - } else { - handle.register_timer(*id, *deadline, cx.waker()); - Poll::Pending - } - } -} - -impl Drop for Sleep { - fn drop(&mut self) { - if let SleepState::Registered { handle, id, .. } = &self.state { - handle.cancel_timer(*id); - } - } -} - -fn wake_all(wakers: Vec) { - for waker in wakers { - waker.wake(); - } -} - -#[cfg(test)] -mod tests { - use std::{ - sync::{Arc, Mutex}, - time::Duration, - }; - - use crate::sim; - - #[test] - fn sleep_fast_forwards_virtual_time() { - let mut runtime = sim::Runtime::new(101).unwrap(); - - runtime.block_on(async { - assert_eq!(super::now(), Duration::ZERO); - super::sleep(Duration::from_millis(5)).await; - assert_eq!(super::now(), Duration::from_millis(5)); - }); - } - - #[test] - fn shorter_timer_wakes_first() { - let mut runtime = sim::Runtime::new(102).unwrap(); - let handle = runtime.handle(); - let order = Arc::new(Mutex::new(Vec::new())); - - runtime.block_on({ - let order = Arc::clone(&order); - async move { - let slow_order = Arc::clone(&order); - let slow = handle.spawn_on(sim::NodeId::MAIN, async move { - super::sleep(Duration::from_millis(10)).await; - slow_order.lock().expect("order poisoned").push(10); - }); - - let fast_order = Arc::clone(&order); - let fast = handle.spawn_on(sim::NodeId::MAIN, async move { - super::sleep(Duration::from_millis(3)).await; - fast_order.lock().expect("order poisoned").push(3); - }); - - fast.await; - slow.await; - } - }); - - assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); - assert_eq!(runtime.elapsed(), Duration::from_millis(10)); - } - - #[test] - fn explicit_advance_moves_virtual_time() { - let mut runtime = sim::Runtime::new(103).unwrap(); - - runtime.block_on(async { - super::advance(Duration::from_millis(7)); - assert_eq!(super::now(), Duration::from_millis(7)); - }); - } - - #[test] - fn timeout_returns_future_output_before_deadline() { - let mut runtime = sim::Runtime::new(104).unwrap(); - - let output = runtime.block_on(async { - super::timeout(Duration::from_millis(10), async { - super::sleep(Duration::from_millis(3)).await; - 9 - }) - .await - }); - - assert_eq!(output, Ok(9)); - assert_eq!(runtime.elapsed(), Duration::from_millis(3)); - } - - #[test] - fn timeout_expires_at_virtual_deadline() { - let mut runtime = sim::Runtime::new(105).unwrap(); - - let output = runtime.block_on(async { - super::timeout(Duration::from_millis(4), async { - super::sleep(Duration::from_millis(20)).await; - 9 - }) - .await - }); - - assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); - assert_eq!(runtime.elapsed(), Duration::from_millis(4)); - } -} diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs new file mode 100644 index 00000000000..6210675f638 --- /dev/null +++ b/crates/runtime/src/sim/time/mod.rs @@ -0,0 +1,297 @@ +//! Virtual time for the local simulation runtime. + +mod sleep; + +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use core::{fmt, future::Future, task::Waker, time::Duration}; + +use futures_util::{select_biased, FutureExt}; +use sleep::wake_all; +use spin::Mutex; + +pub use sleep::Sleep; + +/// Shared virtual clock and timer registry for one simulation runtime. +/// +/// All cloned handles observe the same virtual `now`, pending timers, and +/// timer-id sequence. The executor uses this handle both for explicit +/// time-travel operations and for jumping directly to the next pending timer +/// when the runnable queue is empty. +#[derive(Clone, Debug)] +pub struct TimeHandle { + inner: Arc>, +} + +impl TimeHandle { + pub fn new() -> Self { + Self { + inner: Arc::new(Mutex::new(TimeState::default())), + } + } + + pub fn now(&self) -> Duration { + self.inner.lock().now + } + + /// Move virtual time forward by an explicit amount. + /// + /// This is the direct "advance the clock" operation used by tests and + /// higher-level simulation code. It updates `now`, removes any timers that + /// became due at the new instant, and wakes the corresponding tasks after + /// releasing the lock. + pub fn advance(&self, duration: Duration) { + if duration.is_zero() { + return; + } + + let wakers = { + let mut state = self.inner.lock(); + state.now = state.now.saturating_add(duration); + state.take_due_wakers() + }; + wake_all(wakers); + } + + /// Jump virtual time to the earliest outstanding timer and wake it. + /// + /// The executor calls this when there are no runnable tasks left. Instead + /// of incrementing time in wall-clock steps, simulation time jumps + /// directly to the minimum timer deadline. Returns `false` if there are no + /// timers to wake. + pub fn wake_next_timer(&self) -> bool { + let wakers = { + let mut state = self.inner.lock(); + let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { + return false; + }; + if next_deadline > state.now { + state.now = next_deadline; + } + state.take_due_wakers() + }; + let woke = !wakers.is_empty(); + wake_all(wakers); + woke + } + + /// Register or refresh a timer entry for a sleeping future. + /// + /// Sleep futures keep a stable `TimerId` across polls. Re-registering with + /// the same id updates the stored waker without creating duplicate timers. + fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { + let mut state = self.inner.lock(); + state.timers.insert( + id, + TimerEntry { + deadline, + waker: waker.clone(), + }, + ); + } + + /// Remove a timer entry if it is still present. + /// + /// Cancellation is best-effort because the timer may already have been + /// removed by a wakeup path before the caller reaches this point. + fn cancel_timer(&self, id: TimerId) { + self.inner.lock().timers.remove(&id); + } + + /// Allocate a fresh timer id for a new sleep future. + /// + /// Stable timer ids are what let a `Sleep` future re-register itself + /// across polls while still mapping back to a single timer entry. + fn next_timer_id(&self) -> TimerId { + let mut state = self.inner.lock(); + let id = TimerId(state.next_timer_id); + state.next_timer_id = state.next_timer_id.saturating_add(1); + id + } + + /// Create a future that becomes ready after `duration` of virtual time. + /// + /// The returned future is lazy: it does not allocate a timer entry until + /// the first poll, when it can anchor its deadline to the current virtual + /// time. + pub fn sleep(&self, duration: Duration) -> Sleep { + Sleep::new(self.clone(), duration) + } + + /// Race a future against a virtual-time sleep. + /// + /// This is implemented as `future` versus `sleep(duration)` using a biased + /// select. If both become ready in the same simulated step, the main + /// future wins the tie so completion beats timeout deterministically. + pub async fn timeout(&self, duration: Duration, future: impl Future) -> Result { + let sleep = self.sleep(duration); + futures::pin_mut!(future); + futures::pin_mut!(sleep); + + select_biased! { + output = future.fuse() => Ok(output), + () = sleep.fuse() => Err(TimeoutElapsed { duration }), + } + } +} + +impl Default for TimeHandle { + fn default() -> Self { + Self::new() + } +} + +/// Mutable state behind a [`TimeHandle`]. +/// +/// `timers` is keyed by stable `TimerId` so a `Sleep` future can refresh its +/// waker across polls without accumulating duplicate entries. A `BTreeMap` is +/// used to keep due-timer iteration deterministic. +#[derive(Debug, Default)] +struct TimeState { + now: Duration, + next_timer_id: u64, + timers: BTreeMap, +} + +impl TimeState { + /// Remove every timer whose deadline is at or before the current virtual + /// time and return their wakers. + fn take_due_wakers(&mut self) -> Vec { + let due = self + .timers + .iter() + .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) + .collect::>(); + due.into_iter() + .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) + .collect() + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct TimerId(u64); + +/// Stored metadata for one pending timer. +#[derive(Debug)] +struct TimerEntry { + deadline: Duration, + waker: Waker, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct TimeoutElapsed { + duration: Duration, +} + +impl TimeoutElapsed { + pub fn duration(self) -> Duration { + self.duration + } +} + +impl fmt::Display for TimeoutElapsed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "simulated timeout elapsed after {:?}", self.duration) + } +} + +#[cfg(any(feature = "tokio", feature = "simulation-std"))] +impl std::error::Error for TimeoutElapsed {} + +#[cfg(test)] +mod tests { + use std::{sync::Arc, time::Duration}; + + use crate::sim; + use spin::Mutex; + + #[test] + fn sleep_fast_forwards_virtual_time() { + let mut runtime = sim::Runtime::new(101); + let handle = runtime.handle(); + + runtime.block_on(async move { + assert_eq!(handle.now(), Duration::ZERO); + handle.sleep(Duration::from_millis(5)).await; + assert_eq!(handle.now(), Duration::from_millis(5)); + }); + } + + #[test] + fn shorter_timer_wakes_first() { + let mut runtime = sim::Runtime::new(102); + let handle = runtime.handle(); + let order = Arc::new(Mutex::new(Vec::new())); + + runtime.block_on({ + let order = Arc::clone(&order); + async move { + let slow_order = Arc::clone(&order); + let slow_handle = handle.clone(); + let slow = handle.spawn_on(sim::NodeId::MAIN, async move { + slow_handle.sleep(Duration::from_millis(10)).await; + slow_order.lock().push(10); + }); + + let fast_order = Arc::clone(&order); + let fast_handle = handle.clone(); + let fast = handle.spawn_on(sim::NodeId::MAIN, async move { + fast_handle.sleep(Duration::from_millis(3)).await; + fast_order.lock().push(3); + }); + + fast.await; + slow.await; + } + }); + + assert_eq!(*order.lock(), vec![3, 10]); + assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + } + + #[test] + fn explicit_advance_moves_virtual_time() { + let mut runtime = sim::Runtime::new(103); + let handle = runtime.handle(); + + runtime.block_on(async move { + handle.advance(Duration::from_millis(7)); + assert_eq!(handle.now(), Duration::from_millis(7)); + }); + } + + #[test] + fn timeout_returns_future_output_before_deadline() { + let mut runtime = sim::Runtime::new(104); + let handle = runtime.handle(); + + let output = runtime.block_on(async move { + handle + .timeout(Duration::from_millis(10), async { + handle.sleep(Duration::from_millis(3)).await; + 9 + }) + .await + }); + + assert_eq!(output, Ok(9)); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + } + + #[test] + fn timeout_expires_at_virtual_deadline() { + let mut runtime = sim::Runtime::new(105); + let handle = runtime.handle(); + + let output = runtime.block_on(async move { + handle + .timeout(Duration::from_millis(4), async { + handle.sleep(Duration::from_millis(20)).await; + 9 + }) + .await + }); + + assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + } +} diff --git a/crates/runtime/src/sim/time/sleep.rs b/crates/runtime/src/sim/time/sleep.rs new file mode 100644 index 00000000000..538439018b7 --- /dev/null +++ b/crates/runtime/src/sim/time/sleep.rs @@ -0,0 +1,97 @@ +use alloc::vec::Vec; +use core::{ + future::Future, + pin::Pin, + task::{Context, Poll, Waker}, + time::Duration, +}; + +use super::{TimeHandle, TimerId}; + +/// Future returned by [`TimeHandle::sleep`]. +/// +/// The future stores a relative duration until first poll, then converts that +/// into an absolute deadline and a stable timer id. Subsequent polls either +/// complete immediately if virtual time has already reached the deadline or +/// refresh the registered waker and remain pending. +pub struct Sleep { + duration: Duration, + state: SleepState, +} + +impl Sleep { + pub(super) fn new(handle: TimeHandle, duration: Duration) -> Self { + Self { + duration, + state: SleepState::Unregistered { handle }, + } + } +} + +/// Internal state machine for [`Sleep`]. +enum SleepState { + Unregistered { + handle: TimeHandle, + }, + Registered { + handle: TimeHandle, + id: TimerId, + deadline: Duration, + }, + Done, +} + +impl Future for Sleep { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if matches!(self.state, SleepState::Done) { + return Poll::Ready(()); + } + + if let SleepState::Unregistered { handle } = &self.state { + let handle = handle.clone(); + let deadline = handle.now().saturating_add(self.duration); + let id = handle.next_timer_id(); + self.state = SleepState::Registered { handle, id, deadline }; + } + + let SleepState::Registered { handle, id, deadline } = &self.state else { + unreachable!("sleep state should be registered or done"); + }; + + if handle.now() >= *deadline { + let handle = handle.clone(); + let id = *id; + handle.cancel_timer(id); + self.state = SleepState::Done; + Poll::Ready(()) + } else { + handle.register_timer(*id, *deadline, cx.waker()); + Poll::Pending + } + } +} + +impl Drop for Sleep { + /// Remove a pending timer entry when the future is dropped early. + /// + /// This prevents stale wakers from remaining in the runtime after the + /// corresponding task has been cancelled or a timeout race has completed. + fn drop(&mut self) { + if let SleepState::Registered { handle, id, .. } = &self.state { + handle.cancel_timer(*id); + } + } +} + +/// Wake every task collected from a due-timer scan. +/// +/// Waking happens only after the time-state mutex has been released so resumed +/// tasks can inspect or mutate timer state without deadlocking on the same +/// lock. +pub(super) fn wake_all(wakers: Vec) { + for waker in wakers { + waker.wake(); + } +} diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs new file mode 100644 index 00000000000..18d45b894a2 --- /dev/null +++ b/crates/runtime/tests/sim_e2e.rs @@ -0,0 +1,108 @@ +#![cfg(feature = "simulation")] + +use std::{sync::Arc, time::Duration}; + +use spacetimedb_runtime::sim::{buggify, Rng, Runtime}; +use spin::Mutex; + +#[test] +fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { + let mut runtime = Runtime::new(101); + let handle = runtime.handle(); + let node_a = runtime.create_node(); + let node_b = runtime.create_node(); + let events = Arc::new(Mutex::new(Vec::new())); + + runtime.pause(node_b); + + runtime.block_on({ + let events = Arc::clone(&events); + async move { + let a_handle = handle.clone(); + let a_events = Arc::clone(&events); + let a = handle.spawn_on(node_a, async move { + a_events.lock().push(("a_started", a_handle.now())); + a_handle.sleep(Duration::from_millis(3)).await; + a_events.lock().push(("a_finished", a_handle.now())); + }); + + let b_handle = handle.clone(); + let b_events = Arc::clone(&events); + let b = handle.spawn_on(node_b, async move { + b_events.lock().push(("b_started", b_handle.now())); + b_handle.sleep(Duration::from_millis(2)).await; + b_events.lock().push(("b_finished", b_handle.now())); + }); + + handle.sleep(Duration::from_millis(1)).await; + events.lock().push(("main_resumed_b", handle.now())); + handle.resume(node_b); + + a.await; + b.await; + } + }); + + let events = events.lock().clone(); + assert!(events.contains(&("a_started", Duration::ZERO))); + assert!(events.contains(&("main_resumed_b", Duration::from_millis(1)))); + assert!(events.contains(&("b_started", Duration::from_millis(1)))); + assert!(events.contains(&("a_finished", Duration::from_millis(3)))); + assert!(events.contains(&("b_finished", Duration::from_millis(3)))); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); +} + +#[test] +fn runtime_buggify_matches_standalone_rng_sequence() { + let seed = 77; + let runtime = Runtime::new(seed); + let expected = Rng::new(seed); + + buggify::enable(&runtime); + expected.enable_buggify(); + + let actual = (0..8) + .map(|_| buggify::should_inject_fault_with_prob(&runtime, 0.5)) + .collect::>(); + let expected = (0..8).map(|_| expected.buggify_with_prob(0.5)).collect::>(); + + assert_eq!(actual, expected); + assert!(buggify::is_enabled(&runtime)); + + buggify::disable(&runtime); + assert!(!buggify::is_enabled(&runtime)); + assert!(!buggify::should_inject_fault_with_prob(&runtime, 1.0)); +} + +#[test] +fn multi_node_timeout_uses_shared_virtual_clock() { + let mut runtime = Runtime::new(303); + let handle = runtime.handle(); + let slow_node = runtime.create_node(); + let fast_node = runtime.create_node(); + + let output = runtime.block_on(async move { + let slow_handle = handle.clone(); + let slow = handle.spawn_on(slow_node, async move { + slow_handle + .timeout(Duration::from_millis(4), async { + slow_handle.sleep(Duration::from_millis(10)).await; + "slow-finished" + }) + .await + }); + + let fast_handle = handle.clone(); + let fast = handle.spawn_on(fast_node, async move { + fast_handle.sleep(Duration::from_millis(2)).await; + ("fast-finished", fast_handle.now()) + }); + + (slow.await, fast.await) + }); + + let (slow, fast) = output; + assert_eq!(fast, ("fast-finished", Duration::from_millis(2))); + assert_eq!(slow.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); +} From e4de2bdea1556b76bf53953bbb1dad5bf591aff7 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 18:21:51 +0530 Subject: [PATCH 43/74] drop durability in reopen test helper --- crates/core/src/db/relational_db.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index ca0d1d3ccdb..4d87f3df918 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -2232,11 +2232,12 @@ pub mod tests_utils { drop(self.db); if let Some(DurableState { - durability: _, + durability, rt, replica_dir, }) = self.durable { + drop(durability); // Enter the runtime so that `Self::durable_internal` can spawn a `SnapshotWorker`. let _rt = rt.enter(); let (db, handle) = Self::durable_internal(&replica_dir, rt.handle().clone(), self.want_snapshot_repo)?; From 795a7049d398562ae1d6cdc4014f61f7dc309bb8 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 19:51:53 +0530 Subject: [PATCH 44/74] drop durability in test --- crates/core/src/db/relational_db.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 4d87f3df918..e6ebf098d22 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -2024,7 +2024,7 @@ pub mod tests_utils { use super::*; use core::ops::Deref; - use durability::EmptyHistory; + use durability::{Durability, EmptyHistory}; use spacetimedb_datastore::locking_tx_datastore::MutTxId; use spacetimedb_datastore::locking_tx_datastore::TxId; use spacetimedb_fs_utils::compression::CompressType; @@ -2237,6 +2237,7 @@ pub mod tests_utils { replica_dir, }) = self.durable { + rt.block_on(durability.close()); drop(durability); // Enter the runtime so that `Self::durable_internal` can spawn a `SnapshotWorker`. let _rt = rt.enter(); From 425e728dd257989192a5e209a81b0440930881d2 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 20:36:54 +0530 Subject: [PATCH 45/74] fix snapshot compressor --- Cargo.toml | 4 ++-- crates/core/src/db/relational_db.rs | 4 ++-- crates/core/src/db/snapshot.rs | 11 ++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4b88f753b9f..f4f74204ea3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,7 +140,7 @@ spacetimedb-pg = { path = "crates/pg", version = "=2.2.0" } spacetimedb-physical-plan = { path = "crates/physical-plan", version = "=2.2.0" } spacetimedb-primitives = { path = "crates/primitives", version = "=2.2.0" } spacetimedb-query = { path = "crates/query", version = "=2.2.0" } -spacetimedb-runtime = { path = "crates/runtime", version = "=2.2.0", default-features = false } +spacetimedb-runtime = { path = "crates/runtime", version = "=2.2.0" } spacetimedb-sats = { path = "crates/sats", version = "=2.2.0" } spacetimedb-schema = { path = "crates/schema", version = "=2.2.0" } spacetimedb-standalone = { path = "crates/standalone", version = "=2.2.0" } @@ -391,7 +391,7 @@ features = [ ] [workspace.lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(simulation)'] } +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } [workspace.lints.clippy] # FIXME: we should work on this lint incrementally diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index fad350334d1..df25edb87c4 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -2187,7 +2187,7 @@ pub mod tests_utils { open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { SnapshotWorker::new_with_repository( repo, - snapshot::Compression::Disabled, + snapshot::Compression::Enabled, Runtime::tokio(rt.clone()), ) }) @@ -2317,7 +2317,7 @@ pub mod tests_utils { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { SnapshotWorker::new_with_repository( repo, - snapshot::Compression::Disabled, + snapshot::Compression::Enabled, Runtime::tokio(rt.clone()), ) }) diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 042b257b608..4e3428b20f8 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -69,7 +69,7 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repo: Arc, runtime: Runtime) -> Self { + pub fn new(snapshot_repo: Arc, compression: Compression, runtime: Runtime) -> Self { let database = snapshot_repo.database_identity(); let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); @@ -81,7 +81,12 @@ impl SnapshotWorker { snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), runtime: runtime.clone(), - compression: None, + compression: compression.is_enabled().then(|| Compressor { + snapshot_repo: snapshot_repo.clone(), + metrics: CompressionMetrics::new(database), + stats: <_>::default(), + runtime: runtime.clone(), + }), }; runtime.spawn(actor.run()); @@ -342,7 +347,7 @@ impl CompressionMetrics { } struct Compressor { - snapshot_repo: Arc, + snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, runtime: Runtime, From 466481c869bc5a97f011120f92933ff69c79a4f1 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 00:13:55 +0530 Subject: [PATCH 46/74] minor fixes --- crates/core/src/database_logger.rs | 6 ++-- crates/core/src/db/durability.rs | 2 -- crates/core/src/db/persistence.rs | 6 ++-- crates/core/src/db/relational_db.rs | 28 ++++++------------ crates/core/src/db/snapshot.rs | 44 ++++------------------------- crates/snapshot/tests/remote.rs | 6 ++-- 6 files changed, 22 insertions(+), 70 deletions(-) diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index f194cb60a48..0e202229dea 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -11,7 +11,7 @@ use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use tokio::io::{AsyncRead, BufReader, ReadBuf}; +use tokio::io::{AsyncRead, BufReader}; use tokio::sync::{broadcast, mpsc, oneshot}; use tokio_stream::wrappers::errors::BroadcastStreamRecvError; use tokio_stream::wrappers::BroadcastStream; @@ -592,7 +592,7 @@ fn seek_to(file: &mut File, buf: &mut [u8], num_lines: u32) -> io::Result<()> { Ok(()) } -fn read_exact_at(file: &File, buf: &mut [u8], offset: u64) -> io::Result<()> { +fn read_exact_at(file: &std::fs::File, buf: &mut [u8], offset: u64) -> io::Result<()> { #[cfg(unix)] { use std::os::unix::fs::FileExt; @@ -641,7 +641,7 @@ impl MaybeFile { } impl AsyncRead for MaybeFile { - fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { + fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>) -> Poll> { match self.project() { MaybeFileProj::File { inner } => inner.poll_read(cx, buf), MaybeFileProj::Empty => Poll::Ready(Ok(())), diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index 6d3b814a55f..965196c97ee 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -35,7 +35,6 @@ pub(super) fn spawn_close(durability: Arc, runtime: &Runtime, databa let label = format!("[{database_identity}]"); let runtime = runtime.clone(); runtime.clone().spawn(async move { - log::info!("starting spawn close"); match runtime.timeout(Duration::from_secs(10), durability.close()).await { Err(_elapsed) => { error!("{label} timeout waiting for durability shutdown"); @@ -44,7 +43,6 @@ pub(super) fn spawn_close(durability: Arc, runtime: &Runtime, databa info!("{label} durability shut down at tx offset: {offset:?}"); } } - log::info!("closing spawn close"); }); } diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index cd69b2d82ad..9e84a4fb647 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -70,7 +70,7 @@ impl Persistence { } } - /// If snapshots are enabled, get the snapshot repository they are stored in. + /// If snapshots are enabled, get the [SnapshotRepo] they are stored in. pub fn snapshot_repo(&self) -> Option> { self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } @@ -157,9 +157,7 @@ impl PersistenceProvider for LocalPersistenceProvider { let snapshot_worker = asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await - .map(|repo| { - SnapshotWorker::new_with_repository(repo, snapshot::Compression::Enabled, Runtime::tokio_current()) - })?; + .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio_current()))?; let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; tokio::spawn(relational_db::snapshot_watching_commitlog_compressor( diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index df25edb87c4..f938efc71c1 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -241,14 +241,12 @@ impl RelationalDB { /// /// `None` may be passed to obtain an in-memory only database. /// - /// - snapshots + /// /// - `snapshot_repo` /// - /// Optional snapshot persistence and background snapshot execution, - /// carried through [`Persistence`]. + /// The [`SnapshotRepo`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. - /// If restoring from an existing database, the snapshot repository must - /// store views of the same sequence of TXes as the `history`. - /// + /// If restoring from an existing database, the `snapshot_repo` must + /// store views of the same sequence of TXes as the `history` /// - `metrics_recorder_queue` /// /// The send side of a queue for recording transaction metrics. @@ -489,7 +487,7 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &(impl SnapshotRepo + ?Sized), + snapshot_repo: &DynSnapshotRepo, snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { @@ -623,7 +621,7 @@ impl RelationalDB { } } } - log::info!("[{database_identity}] DATABASE: no usable snapshot in store"); + log::info!("[{database_identity}] DATABASE: no usable snapshot in snapshot repo"); // If we didn't find a snapshot and the commitlog doesn't start at the // zero-th commit (e.g. due to archiving), there is no way to restore @@ -2185,11 +2183,7 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Enabled, - Runtime::tokio(rt.clone()), - ) + SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio(rt.clone())) }) }) .transpose()?; @@ -2315,11 +2309,7 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Enabled, - Runtime::tokio(rt.clone()), - ) + SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio(rt.clone())) }) }) .transpose()?; @@ -2363,7 +2353,7 @@ pub mod tests_utils { Arc::new(|_, _| i64::MAX) } - pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result, DBError> { + pub fn take_snapshot(&self, repo: &DynSnapshotRepo) -> Result, DBError> { Ok(self.inner.take_snapshot(repo)?) } } diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 4e3428b20f8..4a78100f1e0 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,7 +14,7 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo, SnapshotRepository}; +use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; use tokio::sync::watch; use crate::{runtime::Runtime, worker_metrics::WORKER_METRICS}; @@ -60,7 +60,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repo: Arc, + snapshot_repository: Arc, } impl SnapshotWorker { @@ -93,7 +93,7 @@ impl SnapshotWorker { Self { snapshot_created, request_snapshot: request_tx, - snapshot_repo, + snapshot_repository: snapshot_repo, } } @@ -107,9 +107,9 @@ impl SnapshotWorker { .expect("snapshot worker panicked"); } - /// Get the snapshot repository this worker is operating on. + /// Get the snapshot repo this worker is operating on. pub fn snapshot_repo(&self) -> Arc { - self.snapshot_repo.clone() + self.snapshot_repository.clone() } /// Request a snapshot to be taken. @@ -143,40 +143,6 @@ impl SnapshotWorker { } } -impl SnapshotWorker { - pub fn new_with_repository( - snapshot_repository: Arc, - compression: Compression, - runtime: Runtime, - ) -> Self { - let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); - let (snapshot_created, _) = watch::channel(latest_snapshot); - let (request_tx, request_rx) = mpsc::unbounded(); - - let actor = SnapshotWorkerActor { - snapshot_requests: request_rx, - snapshot_repo: snapshot_repository.clone(), - snapshot_created: snapshot_created.clone(), - metrics: SnapshotMetrics::new(database), - runtime: runtime.clone(), - compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repository.clone(), - metrics: CompressionMetrics::new(database), - stats: <_>::default(), - runtime: runtime.clone(), - }), - }; - runtime.spawn(actor.run()); - - Self { - snapshot_created, - request_snapshot: request_tx, - snapshot_repo: snapshot_repository, - } - } -} - struct SnapshotMetrics { snapshot_timing_total: Histogram, snapshot_timing_inner: Histogram, diff --git a/crates/snapshot/tests/remote.rs b/crates/snapshot/tests/remote.rs index 41097b33abd..81d67bc2ec5 100644 --- a/crates/snapshot/tests/remote.rs +++ b/crates/snapshot/tests/remote.rs @@ -10,7 +10,7 @@ use spacetimedb::{ snapshot::{self, SnapshotWorker}, }, error::DBError, - Identity, + runtime, Identity, }; use spacetimedb_datastore::execution_context::Workload; use spacetimedb_datastore::locking_tx_datastore::datastore::Locking; @@ -227,14 +227,14 @@ impl SourceSnapshot { async fn create_snapshot(repo: Arc) -> anyhow::Result { let start = Instant::now(); - let rt = tokio::runtime::Handle::current(); + let rt = runtime::Runtime::tokio_current(); // NOTE: `_db` needs to stay alive until the snapshot is taken, // because the snapshot worker holds only a weak reference. let (mut watch, _db) = spawn_blocking(|| { let persistence = Persistence { durability: Arc::new(NoDurability::default()), disk_size: Arc::new(|| Ok(<_>::default())), - snapshots: Some(SnapshotWorker::new(repo, snapshot::Compression::Disabled)), + snapshots: Some(SnapshotWorker::new(repo, snapshot::Compression::Disabled, rt.clone())), runtime: rt, }; let db = TestDB::open_db(EmptyHistory::new(), Some(persistence), None, 0)?; From 7d1e21d7f148179b09458ca54ef8a82a59052c47 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 00:42:18 +0530 Subject: [PATCH 47/74] minor fix --- crates/core/src/db/durability.rs | 3 +- crates/core/src/db/persistence.rs | 11 ++- crates/core/src/db/relational_db.rs | 23 +++-- crates/core/src/db/snapshot.rs | 3 +- crates/core/src/lib.rs | 1 - crates/core/src/runtime.rs | 4 - .../subscription/module_subscription_actor.rs | 2 +- crates/standalone/src/subcommands/start.rs | 97 ++++++------------- 8 files changed, 57 insertions(+), 87 deletions(-) delete mode 100644 crates/core/src/runtime.rs diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index 965196c97ee..d712630a63a 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -10,7 +10,8 @@ use spacetimedb_durability::Transaction; use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; -use crate::{db::persistence::Durability, runtime::Runtime}; +use crate::db::persistence::Durability; +use spacetimedb_runtime::Runtime; pub(super) fn request_durability( durability: &Durability, diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index 9e84a4fb647..dbd7e42c22c 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -6,7 +6,8 @@ use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::DynSnapshotRepo; -use crate::{messages::control_db::Database, runtime::Runtime, util::asyncify}; +use crate::{messages::control_db::Database, util::asyncify}; +use spacetimedb_runtime::Runtime; use super::{ relational_db::{self, Txdata}, @@ -152,13 +153,15 @@ impl PersistenceProvider for LocalPersistenceProvider { async fn persistence(&self, database: &Database, replica_id: u64) -> anyhow::Result { let replica_dir = self.data_dir.replica(replica_id); let snapshot_dir = replica_dir.snapshots(); + let runtime = Runtime::tokio_current(); let database_identity = database.database_identity; let snapshot_worker = asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio_current()))?; - let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; + .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled, runtime.clone()))?; + let (durability, disk_size) = + relational_db::local_durability(replica_dir, runtime.clone(), Some(&snapshot_worker)).await?; tokio::spawn(relational_db::snapshot_watching_commitlog_compressor( snapshot_worker.subscribe(), @@ -171,7 +174,7 @@ impl PersistenceProvider for LocalPersistenceProvider { durability, disk_size, snapshots: Some(snapshot_worker), - runtime: Runtime::tokio_current(), + runtime, }) } } diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index f938efc71c1..347d0509159 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -1,7 +1,6 @@ use crate::db::durability::{request_durability, spawn_close as spawn_durability_close}; use crate::db::MetricsRecorderQueue; use crate::error::{DBError, RestoreSnapshotError}; -use crate::runtime::Runtime; use crate::subscription::ExecutionCounters; use crate::util::asyncify; use crate::worker_metrics::WORKER_METRICS; @@ -45,6 +44,7 @@ use spacetimedb_lib::Identity; use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; +use spacetimedb_runtime::Runtime; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductType, ProductValue}; @@ -54,7 +54,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepo, SnapshotRepository}; +use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -143,8 +143,6 @@ impl Drop for RelationalDB { if let (Some(durability), Some(runtime)) = (self.durability.take(), self.durability_runtime.take()) { spawn_durability_close(durability, &runtime, self.database_identity); } - - log::info!("drop done"); } } @@ -241,12 +239,13 @@ impl RelationalDB { /// /// `None` may be passed to obtain an in-memory only database. /// - /// /// - `snapshot_repo` + /// - `snapshot_repo` /// /// The [`SnapshotRepo`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. /// If restoring from an existing database, the `snapshot_repo` must /// store views of the same sequence of TXes as the `history` + /// /// - `metrics_recorder_queue` /// /// The send side of a queue for recording transaction metrics. @@ -1681,9 +1680,9 @@ const COMMITLOG_COMPRESSION_FORCE_SEGMENT_BACKLOG: usize = 8; /// of the commitlog. pub async fn local_durability( replica_dir: ReplicaDir, + runtime: Runtime, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { - let runtime = Runtime::tokio_current(); let on_new_segment = snapshot_worker.map(|snapshot_worker| { let snapshot_worker = snapshot_worker.clone(); Arc::new(move || { @@ -2188,14 +2187,16 @@ pub mod tests_utils { }) .transpose()?; - let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; + let runtime = Runtime::tokio(rt.clone()); + let (local, disk_size_fn) = + rt.block_on(local_durability(root.clone(), runtime.clone(), snapshots.as_ref()))?; let history = local.as_history(); let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: Runtime::tokio(rt), + runtime, }; let (db, _) = RelationalDB::open( @@ -2313,13 +2314,15 @@ pub mod tests_utils { }) }) .transpose()?; - let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; + let runtime = Runtime::tokio(rt.clone()); + let (local, disk_size_fn) = + rt.block_on(local_durability(root.clone(), runtime.clone(), snapshots.as_ref()))?; let history = local.as_history(); let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: Runtime::tokio(rt), + runtime, }; let db = Self::open_db(history, Some(persistence), None, 0)?; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 4a78100f1e0..ca1749bd610 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -17,7 +17,8 @@ use spacetimedb_lib::Identity; use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; use tokio::sync::watch; -use crate::{runtime::Runtime, worker_metrics::WORKER_METRICS}; +use crate::worker_metrics::WORKER_METRICS; +use spacetimedb_runtime::Runtime; pub type SnapshotDatabaseState = Arc>; diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 4a7246bcbd7..26b35230b1f 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -18,7 +18,6 @@ pub mod estimation; pub mod host; pub mod module_host_context; pub mod replica_context; -pub mod runtime; pub mod startup; pub mod subscription; pub mod util; diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs deleted file mode 100644 index 4c55c71dccd..00000000000 --- a/crates/core/src/runtime.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Runtime boundary re-exported for core call sites. - -pub use spacetimedb_runtime::{current_handle_or_new_runtime, TokioHandle, TokioRuntime}; -pub use spacetimedb_runtime::{Runtime, RuntimeTimeout}; diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 6ef9c3f055f..f9c9b13ae04 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2103,7 +2103,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: crate::runtime::Runtime::tokio(rt), + runtime: spacetimedb_runtime::Runtime::tokio(rt), }), None, 0, diff --git a/crates/standalone/src/subcommands/start.rs b/crates/standalone/src/subcommands/start.rs index bc8241938d2..50f6db19257 100644 --- a/crates/standalone/src/subcommands/start.rs +++ b/crates/standalone/src/subcommands/start.rs @@ -1,18 +1,12 @@ -#[cfg(not(simulation))] use netstat2::{get_sockets_info, AddressFamilyFlags, ProtocolFlags, ProtocolSocketInfo, TcpState}; -#[cfg(not(simulation))] use spacetimedb_client_api::routes::identity::IdentityRoutes; -#[cfg(not(simulation))] use spacetimedb_pg::pg_server; -#[cfg(not(simulation))] use std::io::{self, Write}; -#[cfg(not(simulation))] use std::net::IpAddr; use std::sync::Arc; use crate::{StandaloneEnv, StandaloneOptions}; use anyhow::Context; -#[cfg(not(simulation))] use axum::extract::DefaultBodyLimit; use clap::ArgAction::SetTrue; use clap::{Arg, ArgMatches}; @@ -21,14 +15,11 @@ use spacetimedb::db::{self, Storage}; use spacetimedb::startup::{self, TracingOptions}; use spacetimedb::util::jobs::JobCores; use spacetimedb::worker_metrics; -#[cfg(not(simulation))] use spacetimedb_client_api::routes::database::DatabaseRoutes; -#[cfg(not(simulation))] use spacetimedb_client_api::routes::router; use spacetimedb_client_api::routes::subscribe::WebSocketOptions; use spacetimedb_paths::cli::{PrivKeyPath, PubKeyPath}; use spacetimedb_paths::server::{ConfigToml, ServerDataDir}; -#[cfg(not(simulation))] use tokio::net::TcpListener; pub fn cli() -> clap::Command { @@ -120,7 +111,6 @@ impl ConfigFile { pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { let listen_addr = args.get_one::("listen_addr").unwrap(); let pg_port = args.get_one::("pg_port"); - #[cfg(not(simulation))] let non_interactive = args.get_flag("non_interactive"); let cert_dir = args.get_one::("jwt_key_dir"); let certs = Option::zip( @@ -208,26 +198,13 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { ); worker_metrics::spawn_page_pool_stats(listen_addr.clone(), ctx.page_pool().clone()); worker_metrics::spawn_bsatn_rlb_pool_stats(listen_addr.clone(), ctx.bsatn_rlb_pool().clone()); - #[cfg(simulation)] - { - let _ = (pg_port, ctx, listen_addr); - anyhow::bail!("standalone start server mode is not supported under simulation"); - } - - #[cfg(not(simulation))] let mut db_routes = DatabaseRoutes::default(); - #[cfg(not(simulation))] - { - db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); - db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); - db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); - } - #[cfg(not(simulation))] + db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); + db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); + db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); let extra = axum::Router::new().nest("/health", spacetimedb_client_api::routes::health::router()); - #[cfg(not(simulation))] let service = router(&ctx, db_routes, IdentityRoutes::default(), extra).with_state(ctx.clone()); - #[cfg(not(simulation))] // Check if the requested port is available on both IPv4 and IPv6. // If not, offer to find an available port by incrementing (unless non-interactive). let listen_addr = if let Some((host, port_str)) = listen_addr.rsplit_once(':') { @@ -273,44 +250,40 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { listen_addr.to_string() }; - #[cfg(not(simulation))] - { - let tcp = TcpListener::bind(&listen_addr).await.context(format!( - "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" + let tcp = TcpListener::bind(&listen_addr).await.context(format!( + "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" + ))?; + socket2::SockRef::from(&tcp).set_nodelay(true)?; + log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); + + if let Some(pg_port) = pg_port { + let server_addr = listen_addr.split(':').next().unwrap(); + let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( + "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" ))?; - socket2::SockRef::from(&tcp).set_nodelay(true)?; - log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); - - if let Some(pg_port) = pg_port { - let server_addr = listen_addr.split(':').next().unwrap(); - let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( - "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" - ))?; - - let notify = Arc::new(tokio::sync::Notify::new()); - let shutdown_notify = notify.clone(); - tokio::select! { - _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, - _ = axum::serve(tcp, service).with_graceful_shutdown(async move { - shutdown_notify.notified().await; - }) => {}, - _ = tokio::signal::ctrl_c() => { - println!("Shutting down servers..."); - notify.notify_waiters(); // Notify all tasks - } + + let notify = Arc::new(tokio::sync::Notify::new()); + let shutdown_notify = notify.clone(); + tokio::select! { + _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, + _ = axum::serve(tcp, service).with_graceful_shutdown(async move { + shutdown_notify.notified().await; + }) => {}, + _ = tokio::signal::ctrl_c() => { + println!("Shutting down servers..."); + notify.notify_waiters(); // Notify all tasks } - } else { - log::warn!("PostgreSQL wire protocol server disabled"); - axum::serve(tcp, service) - .with_graceful_shutdown(async { - tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); - log::info!("Shutting down server..."); - }) - .await?; } + } else { + log::warn!("PostgreSQL wire protocol server disabled"); + axum::serve(tcp, service) + .with_graceful_shutdown(async { + tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); + log::info!("Shutting down server..."); + }) + .await?; } - #[cfg(not(simulation))] Ok(()) } @@ -329,7 +302,6 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { /// Note: There is a small race condition between this check and the actual bind - /// another process could grab the port in between. This is unlikely in practice /// and the actual bind will fail with a clear error if it happens. -#[cfg(not(simulation))] pub fn is_port_available(host: &str, port: u16) -> bool { let requested = match parse_host(host) { Some(r) => r, @@ -364,13 +336,11 @@ pub fn is_port_available(host: &str, port: u16) -> bool { } #[derive(Debug, Clone, Copy)] -#[cfg(not(simulation))] enum RequestedHost { Localhost, Ip(IpAddr), } -#[cfg(not(simulation))] fn parse_host(host: &str) -> Option { let host = host.trim(); @@ -384,7 +354,6 @@ fn parse_host(host: &str) -> Option { host.parse::().ok().map(RequestedHost::Ip) } -#[cfg(not(simulation))] fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { match requested { RequestedHost::Localhost => match listener_addr { @@ -455,7 +424,6 @@ fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { /// Find an available port starting from the requested port. /// Returns the first port that is available on both IPv4 and IPv6. -#[cfg(not(simulation))] fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Option { for offset in 0..max_attempts { let port = requested_port.saturating_add(offset); @@ -470,7 +438,6 @@ fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Op } /// Prompt the user with a yes/no question. Returns true if they answer yes. -#[cfg(not(simulation))] fn prompt_yes_no(question: &str) -> bool { print!("{} [y/N] ", question); io::stdout().flush().ok(); From 13d53a5cc48e365219b1ec01196d26c456b4ead9 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 11:19:57 +0530 Subject: [PATCH 48/74] fix --- .../src/targets/relational_db_concurrent.rs | 113 ++++++++++++++---- 1 file changed, 91 insertions(+), 22 deletions(-) diff --git a/crates/dst/src/targets/relational_db_concurrent.rs b/crates/dst/src/targets/relational_db_concurrent.rs index 233e8dd5300..f0299470779 100644 --- a/crates/dst/src/targets/relational_db_concurrent.rs +++ b/crates/dst/src/targets/relational_db_concurrent.rs @@ -271,11 +271,11 @@ impl<'a> RoundMachine<'a> { fn multi_reader_snapshot(&mut self, round: &RoundPlan) -> Result<(), String> { self.begin_read(client(0))?; self.begin_read(client(1))?; - let rows_0 = self.full_scan(client(0))?; - let rows_1 = self.full_scan(client(1))?; - if rows_0 != rows_1 { + let snapshot_0 = self.full_scan(client(0))?; + let snapshot_1 = self.full_scan(client(1))?; + if snapshot_0 != snapshot_1 { return Err(format!( - "[ConcurrentRelationalDb] round={} readers observed different snapshots: left={rows_0:?} right={rows_1:?}", + "[ConcurrentRelationalDb] round={} readers observed different snapshots: left={snapshot_0:?} right={snapshot_1:?}", self.round )); } @@ -478,34 +478,28 @@ impl<'a> RoundMachine<'a> { } } - fn full_scan(&mut self, client: SessionId) -> Result, String> { + fn full_scan(&mut self, client: SessionId) -> Result { self.record_action(client, "full_scan"); - let rows = self.with_reader(client, |tx| collect_rows_in_tx(self.db, self.table_id, tx, "full scan"))?; + let summary = self.with_reader(client, |tx| scan_summary_in_tx(self.db, self.table_id, tx, "full scan"))?; self.events.push(RoundEvent::Read { round: self.round, client, kind: ReadKind::FullScan, - rows: rows.clone(), + summary, }); - Ok(rows) + Ok(summary) } - fn point_lookup(&mut self, client: SessionId, id: u64) -> Result, String> { + fn point_lookup(&mut self, client: SessionId, id: u64) -> Result { self.record_action(client, "point_lookup"); - let rows = self - .with_reader(client, |tx| { - collect_rows_in_tx(self.db, self.table_id, tx, "point lookup") - })? - .into_iter() - .filter(|row| row.id() == Some(id)) - .collect::>(); + let summary = self.with_reader(client, |tx| point_lookup_summary_in_tx(self.db, self.table_id, tx, id))?; self.events.push(RoundEvent::Read { round: self.round, client, kind: ReadKind::PointLookup { id }, - rows: rows.clone(), + summary, }); - Ok(rows) + Ok(summary) } fn with_writer( @@ -708,7 +702,7 @@ enum RoundEvent { round: u64, client: SessionId, kind: ReadKind, - rows: Vec, + summary: ReadSummary, }, } @@ -725,6 +719,20 @@ enum ReadKind { PointLookup { id: u64 }, } +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +struct ReadSummary { + row_count: usize, + checksum: u64, +} + +impl ReadSummary { + fn add_row(&mut self, row: &SimRow, label: &'static str) -> Result<(), String> { + self.row_count += 1; + self.checksum = self.checksum.wrapping_add(concurrent_row_checksum(row, label)?); + Ok(()) + } +} + #[derive(Clone, Debug)] enum ConcurrentMutation { Inserted(SimRow), @@ -808,13 +816,13 @@ impl StreamingProperties 1 || rows.iter().any(|row| row.id() != Some(*id)) { + if summary.row_count > 1 { return Err(format!( - "[ConcurrentRelationalDb] round={} invalid point lookup id={id}: {rows:?}", + "[ConcurrentRelationalDb] round={} invalid point lookup id={id}: {summary:?}", observation.round )); } @@ -862,6 +870,67 @@ fn collect_rows_in_tx( Ok(rows) } +fn scan_summary_in_tx( + db: &RelationalDB, + table_id: TableId, + tx: &RelTx, + label: &'static str, +) -> Result { + let mut summary = ReadSummary::default(); + for row_ref in db.iter(tx, table_id).map_err(|err| format!("{label} failed: {err}"))? { + let row = SimRow::from_product_value(row_ref.to_product_value()); + summary.add_row(&row, label)?; + } + Ok(summary) +} + +fn point_lookup_summary_in_tx( + db: &RelationalDB, + table_id: TableId, + tx: &RelTx, + id: u64, +) -> Result { + let value = AlgebraicValue::U64(id); + let mut summary = ReadSummary::default(); + for row_ref in db + .iter_by_col_eq(tx, table_id, 0u16, &value) + .map_err(|err| format!("point lookup failed: {err}"))? + { + let row = SimRow::from_product_value(row_ref.to_product_value()); + if row.id() != Some(id) { + return Err(format!( + "[ConcurrentRelationalDb] point lookup id={id} returned different row: {row:?}" + )); + } + summary.add_row(&row, "point lookup")?; + } + Ok(summary) +} + +fn concurrent_row_checksum(row: &SimRow, label: &'static str) -> Result { + let id = row + .id() + .ok_or_else(|| format!("[ConcurrentRelationalDb] {label} row missing u64 id: {row:?}"))?; + let value = match row.values.get(1) { + Some(AlgebraicValue::U64(value)) => *value, + other => { + return Err(format!( + "[ConcurrentRelationalDb] {label} row has invalid value column: {other:?} in {row:?}" + )); + } + }; + + Ok(mix64(id) + .wrapping_add(mix64(value ^ 0xa076_1d64_78bd_642f)) + .wrapping_add(mix64(row.values.len() as u64))) +} + +fn mix64(mut value: u64) -> u64 { + value = (value ^ (value >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + value = (value ^ (value >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + value ^ (value >> 31) +} + fn expected_rows_from_events(events: &[RoundEvent]) -> Vec { let mut commits = events .iter() From a521298cbd17222603e3f72b4554cd629d3c8925 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 13:08:06 +0530 Subject: [PATCH 49/74] fixes --- crates/core/Cargo.toml | 1 - crates/core/src/db/relational_db.rs | 4 +- crates/durability/src/imp/local.rs | 48 +++++--- crates/durability/src/imp/mod.rs | 5 - crates/runtime/src/lib.rs | 177 ++++++++++++++++++++++++++-- crates/runtime/src/sim/executor.rs | 105 +++++++++++++++-- crates/runtime/src/sim/mod.rs | 2 +- 7 files changed, 299 insertions(+), 43 deletions(-) diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 2947eccac9d..6e7075536c2 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -134,7 +134,6 @@ tikv-jemalloc-ctl = {workspace = true} [target.'cfg(target_os = "linux")'.dependencies] nix = { workspace = true, features = ["sched"] } - [features] # Print a warning when doing an unindexed `iter_by_col_range` on a large table. unindexed_iter_by_col_range_warn = [] diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 347d0509159..1cdaa47142c 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -244,7 +244,7 @@ impl RelationalDB { /// The [`SnapshotRepo`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. /// If restoring from an existing database, the `snapshot_repo` must - /// store views of the same sequence of TXes as the `history` + /// store views of the same sequence of TXes as the `history`. /// /// - `metrics_recorder_queue` /// @@ -2182,7 +2182,7 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { - SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio(rt.clone())) + SnapshotWorker::new(repo, snapshot::Compression::Disabled, Runtime::tokio(rt.clone())) }) }) .transpose()?; diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 6bced456ca7..ea7d78ae6cb 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -10,6 +10,7 @@ use std::{ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; +use scopeguard::ScopeGuard; use spacetimedb_commitlog::{ error, payload::Txdata, @@ -18,9 +19,9 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; -use spacetimedb_runtime::Runtime; +use spacetimedb_runtime::{JoinHandle, Runtime}; use thiserror::Error; -use tokio::sync::{oneshot, watch}; +use tokio::sync::watch; use tracing::{instrument, Span}; use crate::{Close, Durability, DurableOffset, History, PreparedTx, TxOffset}; @@ -106,9 +107,9 @@ where /// This is mainly for observability purposes, and can thus be updated with /// relaxed memory ordering. queue_depth: Arc, - /// Completion notification for the background actor. Contains `None` once + /// [`JoinHandle`] for the background actor task. Contains `None` once /// consumed by [`Durability::close`]. - actor_done: Mutex>>, + actor: Mutex>>, } /// Commitlog repo backed by [`Fs`] and protected by a [`LockedFile`]. @@ -225,17 +226,12 @@ where T: Encode + Send + Sync + 'static, R: Repo + Send + Sync + 'static, { - fn open_inner( - clog: Arc, R>>, - runtime: Runtime, - opts: Options, - ) -> Result { + fn open_inner(clog: Arc, R>>, runtime: Runtime, opts: Options) -> Result { let queue_capacity = opts.queue_capacity(); let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); let (durable_tx, durable_rx) = watch::channel(clog.max_committed_offset()); - let (actor_done_tx, actor_done_rx) = oneshot::channel(); - runtime.spawn( + let actor = runtime.spawn( Actor { clog: clog.clone(), durable_offset: durable_tx, @@ -243,7 +239,7 @@ where batch_capacity: opts.batch_capacity, runtime: runtime.clone(), } - .run(txdata_rx, actor_done_tx), + .run(txdata_rx), ); Ok(Self { @@ -251,7 +247,7 @@ where durable_offset: durable_rx, queue, queue_depth, - actor_done: Mutex::new(Some(actor_done_rx)), + actor: Mutex::new(Some(actor)), }) } @@ -324,7 +320,7 @@ where R: Repo + Send + Sync + 'static, { #[instrument(name = "durability::local::actor", skip_all)] - async fn run(self, transactions_rx: async_channel::Receiver>>, done: oneshot::Sender<()>) { + async fn run(self, transactions_rx: async_channel::Receiver>>) { info!("starting durability actor"); let mut tx_buf = Vec::with_capacity(self.batch_capacity.get()); @@ -373,7 +369,6 @@ where } info!("exiting durability actor"); - let _ = done.send(()); } #[instrument(skip_all)] @@ -426,14 +421,29 @@ where info!("close local durability"); let durable_offset = self.durable_tx_offset(); - let maybe_actor_done = self.actor_done.lock().unwrap().take(); + let maybe_actor = self.actor.lock().unwrap().take(); + // Abort actor if shutdown future is dropped. + let abort = scopeguard::guard( + maybe_actor.as_ref().map(|join_handle| join_handle.abort_handle()), + |maybe_abort_handle| { + if let Some(abort_handle) = maybe_abort_handle { + warn!("close future dropped, aborting durability actor"); + abort_handle.abort(); + } + }, + ); self.queue.close(); async move { - if let Some(actor_done) = maybe_actor_done - && actor_done.await.is_err() + if let Some(actor) = maybe_actor + && let Err(e) = actor.await { - warn!("durability actor completion signal dropped"); + // Will print "durability actor: task was cancelled" + // or "durability actor: task panicked [...]" + warn!("durability actor: {e}"); } + // Don't abort if the actor completed. + let _ = ScopeGuard::into_inner(abort); + durable_offset.last_seen() } .boxed() diff --git a/crates/durability/src/imp/mod.rs b/crates/durability/src/imp/mod.rs index 77f0998e6f8..3e00ae21ee1 100644 --- a/crates/durability/src/imp/mod.rs +++ b/crates/durability/src/imp/mod.rs @@ -56,9 +56,4 @@ mod testing { future::ready(*self.durable_offset.borrow()).boxed() } } - - #[cfg(test)] - mod tests { - use super::*; - } } diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 8777409c402..9fe4487b235 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -4,7 +4,14 @@ extern crate alloc; -use core::{fmt, future::Future, time::Duration}; +use core::{ + fmt, + future::Future, + marker::PhantomData, + pin::Pin, + task::{Context, Poll}, + time::Duration, +}; pub mod adapter; #[cfg(feature = "simulation")] @@ -21,6 +28,160 @@ pub enum Runtime { Simulation(sim::Handle), } +pub struct JoinHandle { + inner: JoinHandleInner, +} + +pub struct AbortHandle { + inner: AbortHandleInner, +} + +enum JoinHandleInner { + #[cfg(feature = "tokio")] + Tokio(Option>), + #[cfg(feature = "simulation")] + Simulation(Option>), + Detached(PhantomData), +} + +enum AbortHandleInner { + #[cfg(feature = "tokio")] + Tokio(tokio::task::AbortHandle), + #[cfg(feature = "simulation")] + Simulation(sim::AbortHandle), +} + +#[derive(Debug)] +pub struct JoinError { + inner: JoinErrorInner, +} + +#[derive(Debug)] +enum JoinErrorInner { + #[cfg(feature = "tokio")] + Tokio(tokio::task::JoinError), + #[cfg(feature = "simulation")] + Simulation(sim::JoinError), +} + +impl AbortHandle { + pub fn abort(&self) { + match &self.inner { + #[cfg(feature = "tokio")] + AbortHandleInner::Tokio(handle) => handle.abort(), + #[cfg(feature = "simulation")] + AbortHandleInner::Simulation(handle) => handle.abort(), + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime abort handle has no enabled backend"), + } + } +} + +impl fmt::Display for JoinError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = f; + match &self.inner { + #[cfg(feature = "tokio")] + JoinErrorInner::Tokio(err) => err.fmt(f), + #[cfg(feature = "simulation")] + JoinErrorInner::Simulation(err) => err.fmt(f), + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime join error has no enabled backend"), + } + } +} + +#[cfg(any(feature = "tokio", feature = "simulation-std"))] +impl std::error::Error for JoinError {} + +impl JoinHandle { + pub fn abort_handle(&self) -> AbortHandle { + match &self.inner { + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(Some(handle)) => AbortHandle { + inner: AbortHandleInner::Tokio(handle.abort_handle()), + }, + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(Some(handle)) => AbortHandle { + inner: AbortHandleInner::Simulation(handle.abort_handle()), + }, + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(None) => panic!("runtime join handle aborted after detach"), + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(None) => panic!("runtime join handle aborted after detach"), + JoinHandleInner::Detached(_) => panic!("runtime join handle aborted after completion"), + } + } + + pub fn detach(mut self) { + self.detach_inner(); + } + + fn detach_inner(&mut self) { + match &mut self.inner { + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(handle) => { + drop(handle.take()); + } + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(handle) => { + if let Some(handle) = handle.take() { + handle.detach(); + } + } + JoinHandleInner::Detached(_) => {} + } + self.inner = JoinHandleInner::Detached(PhantomData); + } +} + +impl Future for JoinHandle { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = cx; + match &mut self.inner { + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(Some(handle)) => match Pin::new(handle).poll(cx) { + Poll::Ready(Ok(output)) => { + self.inner = JoinHandleInner::Detached(PhantomData); + Poll::Ready(Ok(output)) + } + Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { + inner: JoinErrorInner::Tokio(err), + })), + Poll::Pending => Poll::Pending, + }, + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(Some(handle)) => match Pin::new(handle).poll_join(cx) { + Poll::Ready(Ok(output)) => { + self.inner = JoinHandleInner::Detached(PhantomData); + Poll::Ready(Ok(output)) + } + Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { + inner: JoinErrorInner::Simulation(err), + })), + Poll::Pending => Poll::Pending, + }, + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(None) => panic!("runtime join handle polled after detach"), + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(None) => panic!("runtime join handle polled after detach"), + JoinHandleInner::Detached(_) => panic!("runtime join handle polled after completion"), + } + } +} + +impl Drop for JoinHandle { + fn drop(&mut self) { + self.detach_inner(); + } +} + +impl Unpin for JoinHandle {} + #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct RuntimeTimeout; @@ -54,18 +215,18 @@ impl Runtime { adapter::sim_std::simulation_current() } - pub fn spawn(&self, future: impl Future + Send + 'static) { + pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle<()> { #[cfg(not(any(feature = "tokio", feature = "simulation")))] let _ = future; match self { #[cfg(feature = "tokio")] - Self::Tokio(handle) => { - handle.spawn(future); - } + Self::Tokio(handle) => JoinHandle { + inner: JoinHandleInner::Tokio(Some(handle.spawn(future))), + }, #[cfg(feature = "simulation")] - Self::Simulation(handle) => { - handle.spawn_on(sim::NodeId::MAIN, future).detach(); - } + Self::Simulation(handle) => JoinHandle { + inner: JoinHandleInner::Simulation(Some(handle.spawn_on(sim::NodeId::MAIN, future))), + }, #[cfg(not(any(feature = "tokio", feature = "simulation")))] _ => unreachable!("runtime dispatch has no enabled backend"), } diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index e0a28afc4ba..597bfcc4e09 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -6,7 +6,7 @@ use core::{ future::Future, pin::Pin, sync::atomic::{AtomicBool, AtomicU64, Ordering}, - task::{Context, Poll}, + task::{Context, Poll, Waker}, time::Duration, }; @@ -247,21 +247,104 @@ impl Handle { /// A spawned simulated task. pub struct JoinHandle { - task: async_task::Task, + task: async_task::Task, NodeId>, + abort: AbortHandle, } impl JoinHandle { + /// Return a handle that can cancel this task without consuming the join + /// handle. + pub fn abort_handle(&self) -> AbortHandle { + self.abort.clone() + } + /// Detach the task so it continues running without awaiting its output. pub fn detach(self) { self.task.detach(); } + + pub(crate) fn poll_join(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.task).poll(cx) + } } impl Future for JoinHandle { type Output = T; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - Pin::new(&mut self.task).poll(cx) + match self.as_mut().poll_join(cx) { + Poll::Ready(Ok(output)) => Poll::Ready(output), + Poll::Ready(Err(err)) => panic!("sim task: {err}"), + Poll::Pending => Poll::Pending, + } + } +} + +#[derive(Clone)] +pub struct AbortHandle { + state: Arc, +} + +impl AbortHandle { + pub fn abort(&self) { + self.state.aborted.store(true, Ordering::Relaxed); + if let Some(waker) = self.state.waker.lock().take() { + waker.wake(); + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct JoinError; + +impl fmt::Display for JoinError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("task was cancelled") + } +} + +#[cfg(feature = "simulation-std")] +impl std::error::Error for JoinError {} + +struct AbortState { + aborted: AtomicBool, + waker: Mutex>, +} + +impl AbortState { + fn new() -> Self { + Self { + aborted: AtomicBool::new(false), + waker: Mutex::new(None), + } + } +} + +struct Abortable { + future: F, + abort: AbortHandle, +} + +impl Abortable { + fn new(future: F, abort: AbortHandle) -> Self { + Self { future, abort } + } +} + +impl Future for Abortable { + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if self.abort.state.aborted.load(Ordering::Relaxed) { + return Poll::Ready(Err(JoinError)); + } + + self.abort.state.waker.lock().replace(cx.waker().clone()); + + // SAFETY: the wrapper never moves `future` after being pinned. Only the + // cancellation fields outside `future` are accessed normally. + let mut future = unsafe { self.map_unchecked_mut(|this| &mut this.future) }; + future.as_mut().poll(cx).map(Ok) } } @@ -349,13 +432,17 @@ impl Executor { { self.node_state(node); + let abort = AbortHandle { + state: Arc::new(AbortState::new()), + }; + let abortable = Abortable::new(future, abort.clone()); let sender = self.sender.clone(); let (runnable, task) = async_task::Builder::new() .metadata(node) - .spawn(move |_| future, move |runnable| sender.send(runnable)); + .spawn(move |_| abortable, move |runnable| sender.send(runnable)); runnable.schedule(); - JoinHandle { task } + JoinHandle { task, abort } } /// Spawn a non-`Send` task on the single-threaded runtime. @@ -366,15 +453,19 @@ impl Executor { { self.node_state(node); + let abort = AbortHandle { + state: Arc::new(AbortState::new()), + }; + let abortable = Abortable::new(future, abort.clone()); let sender = self.sender.clone(); let (runnable, task) = unsafe { async_task::Builder::new() .metadata(node) - .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) + .spawn_unchecked(move |_| abortable, move |runnable| sender.send(runnable)) }; runnable.schedule(); - JoinHandle { task } + JoinHandle { task, abort } } #[track_caller] diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index 1b778f96d62..9575958f30d 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -11,6 +11,6 @@ mod rng; pub mod time; pub use config::RuntimeConfig; -pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; +pub use executor::{yield_now, AbortHandle, Handle, JoinError, JoinHandle, NodeId, Runtime}; pub(crate) use rng::DeterminismLog; pub use rng::{GlobalRng, Rng}; From e59ac1237f3124a8b34f98e157a92044acba7730 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 13:17:21 +0530 Subject: [PATCH 50/74] fix unneccessary diff --- crates/core/src/db/snapshot.rs | 6 +++--- crates/durability/src/imp/local.rs | 9 ++------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index ca1749bd610..8a83ef4318c 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -61,7 +61,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repository: Arc, + snapshot_repo: Arc, } impl SnapshotWorker { @@ -94,7 +94,7 @@ impl SnapshotWorker { Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository: snapshot_repo, + snapshot_repo, } } @@ -110,7 +110,7 @@ impl SnapshotWorker { /// Get the snapshot repo this worker is operating on. pub fn snapshot_repo(&self) -> Arc { - self.snapshot_repository.clone() + self.snapshot_repo.clone() } /// Request a snapshot to be taken. diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index ea7d78ae6cb..d56a0fa9f61 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -107,8 +107,8 @@ where /// This is mainly for observability purposes, and can thus be updated with /// relaxed memory ordering. queue_depth: Arc, - /// [`JoinHandle`] for the background actor task. Contains `None` once - /// consumed by [`Durability::close`]. + /// [JoinHandle] for the actor task. Contains `None` if already cancelled + /// (via [Durability::close]). actor: Mutex>>, } @@ -278,11 +278,6 @@ where self.queue_depth.load(Relaxed) } - /// Obtain an iterator over the [`Commit`]s in the underlying log. - pub fn commits_from(&self, offset: TxOffset) -> impl Iterator> + use { - self.clog.commits_from(offset).map_ok(Commit::from) - } - /// Get a list of segment offsets, sorted in ascending order. pub fn existing_segment_offsets(&self) -> io::Result> { self.clog.existing_segment_offsets() From d92ac0a98cac5b6bd0804c85140a5d6ef47739db Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 14:55:16 +0530 Subject: [PATCH 51/74] README --- crates/runtime/README.md | 175 +++++++++++++++++++++++++++++++-------- 1 file changed, 142 insertions(+), 33 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 9be9172fb71..bca20bbc1ff 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,32 +1,36 @@ # spacetimedb-runtime `spacetimedb-runtime` is the runtime boundary shared by SpacetimeDB core code -and DST. The goal is not to emulate all of Tokio. We do not aim to support -`tokio::net`, `tokio::fs`, or arbitrary ecosystem compatibility here. The goal -is much narrower: provide the small amount of execution control that core -database code needs so that it can run under either a deterministic single- -threaded runtime or a hosted adapter. - -The crate is intentionally hybrid. Some parts of the process are naturally -Tokio-owned today, especially networking, subscriptions, and other integration- -heavy infrastructure. DST and selected core/database paths need a different -model: single-threaded, deterministic scheduling, explicit time, and a runtime -that can move toward `no_std + alloc`. This crate exists to support both -execution domains without forcing the whole process onto one scheduler. +and deterministic simulation testing (DST). + +The goal is deliberately smaller than "make our own Tokio." We do not try to +support `tokio::net`, `tokio::fs`, `tokio::io`, or arbitrary ecosystem runtime +compatibility here. The crate gives core database code the small amount of +execution control it needs so the same code path can run under either a +deterministic single-threaded simulator or a hosted adapter. + +That makes the runtime model intentionally hybrid. Networking, subscriptions, +client-facing services, and other integration-heavy infrastructure can stay on +Tokio. Core database paths that DST needs to explore should depend on explicit +runtime and storage abstractions instead. This follows the broader SpacetimeDB +direction: keep core state transitions deterministic and replayable, isolate +side effects behind small domain interfaces, and avoid letting host +infrastructure leak into database semantics. ## Architecture The top-level type in [src/lib.rs](./src/lib.rs) is `Runtime`. It is the small -facade that shared core code should depend on. `Runtime` is not the simulator -itself and it is not Tokio. It is a tagged handle with the backends that matter -to SpacetimeDB: +facade that shared core code should depend on when it needs to spawn work, +run blocking work, or apply runtime-owned timeouts. `Runtime` is not the +simulator itself and it is not Tokio. It is a tagged handle with the backends +that matter to SpacetimeDB: - `Runtime::Tokio(TokioHandle)` when the `tokio` feature is enabled - `Runtime::Simulation(sim::Handle)` when the `simulation` feature is enabled Code such as durability and snapshotting should accept or store `Runtime` and -use only the narrow operations exposed there: `spawn`, `spawn_blocking`, and -`timeout`. That keeps shared logic independent of the hosted runtime choice. +use only the narrow operations exposed there. That keeps shared logic +independent of the hosted runtime choice. Under that facade, this crate has two layers. @@ -36,11 +40,78 @@ this layer is `no_std + alloc`, explicit handles, explicit time, and no dependency on ambient host facilities. The second layer is the hosted adapter layer under [src/adapter](./src/adapter). -Today that includes a Tokio adapter and std-hosted simulation conveniences. The -Tokio adapter exists because some production and testing paths still need a real -process runtime. The std-hosted simulation helpers exist because determinism -testing, thread-local convenience APIs, and Unix hooks are useful in hosted -environments even though they are not part of the portable simulation core. +Today that includes a Tokio adapter and std-hosted simulation conveniences. +Those conveniences are useful for DST running as a normal process, but they are +adapters around the simulation core, not part of the portable core itself. + +## Runtime Contract + +The runtime contract is about control, not API compatibility. Code that wants +to be runnable under DST should route scheduling, time, randomness, and +runtime-owned background work through this crate or through a domain-specific +abstraction built on top of it. + +`Runtime` is the API for shared code. `sim::Runtime` is the deterministic engine +used by simulation tests. `adapter::*` is hosted glue for environments that have +Tokio, std, thread-local convenience APIs, or OS hooks available. + +Ambient runtime lookup should stay at the edge. Constructors such as +`Runtime::tokio_current()`, `Runtime::simulation_current()`, and +`current_handle_or_new_runtime()` are useful in bootstrap and adapter code, but +core database code should prefer explicit dependency injection. Passing the +runtime in makes tests replayable and makes the execution boundary visible in +review. + +`Runtime::timeout` is also runtime-owned. In the Tokio backend it is a real +Tokio timeout. In the simulation backend it is driven by virtual time. Shared +code should not assume wall-clock behavior unless it is intentionally running +only in a hosted adapter. + +## Determinism Boundary + +The simulator can only make behavior deterministic when the behavior is under +simulator control. In the simulation backend, the runtime controls: + +- task scheduling and runnable selection +- simulated nodes and pause/resume behavior +- virtual time and sleeps +- runtime RNG decisions +- buggify fault decisions tied to the runtime seed +- task lifecycle for futures spawned through the simulation handle + +These are reproducible from the runtime seed and the same sequence of simulated +inputs. If a test fails, DST should be able to report the target, scenario, +seed, interaction budget, and fault profile needed to reproduce the failure. + +The simulator does not make arbitrary host effects deterministic. Direct use of +OS threads, kernel blocking, wall-clock sleeps, real filesystem behavior, +process randomness, sockets, Tokio reactors, or external services is outside +the deterministic contract. Those effects might still be fine in production, +but DST needs them behind a smaller abstraction with a simulated +implementation. + +## How To Write Shared Code + +Prefer explicit dependencies. If shared code needs to spawn background work, +accept a `Runtime`. If it needs durable storage, accept a commitlog or snapshot +repository abstraction. If it needs time, accept a runtime or clock abstraction. +If it needs network behavior, accept a logical transport abstraction. Do not +pull in raw `tokio::fs`, `tokio::net`, `tokio::io`, or `tokio::time` from the +middle of a core database path and expect DST to control it later. + +The abstraction should match the domain, not the implementation detail. For +commitlog code, abstract over segment/repo operations. For snapshot code, +abstract over snapshot repository and object operations. For future networked +targets, abstract over logical messages and transport behavior. A byte stream +trait is only the right abstraction if byte stream behavior is what the test is +actually trying to model. + +For now, some core crates may still use `tokio::sync`. That is tolerated as a +short-term exception because those primitives are not tied to the Tokio reactor +in the same way as `tokio::net`, `tokio::fs`, or `tokio::time`. It should not +be read as permission to spread Tokio types through new DST-facing APIs. The +longer-term direction is to keep core database modules closer to explicit, +runtime-agnostic, and eventually `no_std + alloc`-friendly primitives. ## Feature Model @@ -56,11 +127,10 @@ The crate is organized around features that reflect that layering. - `tokio` Enables the Tokio-backed hosted adapter and remains part of the default feature set for now. -- `std` - Enables hosted-only functionality shared by the adapter layer. This means “simulation” is not shorthand for “all simulation tooling.” It is -the portable runtime core. Hosted extras live behind `simulation-std`. +the portable runtime core. Hosted extras live behind `simulation-std`, and +Tokio-specific integration lives behind `tokio`. ## Simulation Core @@ -79,6 +149,10 @@ time moves only under runtime control, not wall clock control. runtime uses this for scheduler choices, and test/workload code can use `Rng`/`GlobalRng` when it needs deterministic probabilistic decisions. +[src/sim/buggify.rs](./src/sim/buggify.rs) contains runtime-owned fault +injection helpers. Buggify is tied to a simulation runtime so fault decisions +come from the same seeded decision stream as the rest of the simulated run. + The public simulation surface is intentionally explicit: `sim::Runtime`, `sim::Handle`, `sim::NodeId`, `sim::JoinHandle`, `yield_now`, and the virtual time and RNG utilities. The portable direction is to make explicit-handle APIs @@ -99,18 +173,35 @@ itself. Examples include thread-local “current runtime” access, determinism replay helpers, and Unix hooks that prevent simulation from silently escaping onto real OS threads. +## DST Harness + +The DST crate has its own wrapper under `crates/dst/src/sim`. That wrapper keeps +DST-facing types such as `DstSeed` local to the DST crate while delegating +execution to `spacetimedb-runtime`. + +DST currently uses `simulation-std` because the harness itself runs as a normal +hosted process. That is where thread-local current-handle access, +determinism-check helpers, std random seeding, and pthread guards belong. The +portable simulation core should not grow `simulation-std` conditionals to make +those conveniences work. + +When adding a DST target, route target execution through the DST sim wrapper, +use `--max-interactions` for exact replay, and make all probabilistic choices +come from the run seed or the runtime RNG. Duration-based runs are useful for +local soak testing, but they are not an exact replay budget. + ## Current Scope This crate is not trying to make the whole of core `no_std` immediately. For -now, crates such as `relational_db`, `snapshot`, `commitlog`, and `datastore` -may still use `tokio::sync` internally. That is acceptable in the short term, -because those synchronization primitives are runtime-agnostic enough for DST and -the current runtime boundary effort is focused on execution control, not total -removal of Tokio-adjacent types from core. +now, crates such as relational DB, snapshot, commitlog, and datastore may still +contain std or Tokio-adjacent internals. The first goal is not a full portability +rewrite. The first goal is to stop execution, time, randomness, and durable +effects from being hidden behind ambient host APIs. -The longer-term goal is to reduce those dependencies where it materially helps -portability or determinism, but that work is explicitly out of scope for the -first phase of this crate architecture. +Longer term, the same boundary should make it easier to move selected core +database modules toward more constrained dependencies. That likely means more +small domain abstractions, fewer ambient singletons, fewer runtime-specific +types in core APIs, and less reliance on host behavior that DST cannot replay. ## Intended Usage @@ -125,3 +216,21 @@ on the same deterministic single-thread runtime in both DST and selected production paths, while networking, clients, subscriptions, and other hosted subsystems continue to live on Tokio. That is a deliberate design choice, not a temporary inconsistency. + +## Review Checklist + +Use this checklist when adding code that should be runnable under DST: + +- Does the code receive `Runtime` or a domain abstraction explicitly instead of + calling an ambient Tokio/simulation handle from the middle of core logic? +- Are sleeps, timeouts, background tasks, randomness, and fault decisions routed + through runtime-controlled APIs? +- Are filesystem, network, process, and thread effects hidden behind + domain-level abstractions with deterministic implementations for DST? +- Does the code avoid direct `tokio::fs`, `tokio::net`, `tokio::io`, + `tokio::time`, `std::thread`, wall-clock time, and process randomness on the + DST path? +- If `tokio::sync` is used, is it an internal short-term dependency rather than + a new public boundary for DST-facing core code? +- Can a failure be reproduced from target, scenario, seed, interaction budget, + and fault profile without relying on wall-clock duration or host scheduling? From d074cf01467228d04445dcdf88603d2e0606aa47 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 11:07:03 +0530 Subject: [PATCH 52/74] polishing --- Cargo.lock | 1 - crates/runtime/Cargo.toml | 4 +- crates/runtime/README.md | 155 ++++------- crates/runtime/src/adapter/mod.rs | 5 - crates/runtime/src/adapter/sim_std.rs | 361 -------------------------- crates/runtime/src/adapter/tokio.rs | 11 - crates/runtime/src/lib.rs | 151 +++++++---- crates/runtime/src/sim/config.rs | 16 -- crates/runtime/src/sim/executor.rs | 42 ++- crates/runtime/src/sim/mod.rs | 4 +- crates/runtime/src/sim/rng.rs | 42 +-- crates/runtime/src/sim/time/mod.rs | 2 +- crates/runtime/src/sim_std.rs | 327 +++++++++++++++++++++++ crates/runtime/tests/sim_e2e.rs | 93 +++++++ 14 files changed, 591 insertions(+), 623 deletions(-) delete mode 100644 crates/runtime/src/adapter/mod.rs delete mode 100644 crates/runtime/src/adapter/sim_std.rs delete mode 100644 crates/runtime/src/adapter/tokio.rs delete mode 100644 crates/runtime/src/sim/config.rs create mode 100644 crates/runtime/src/sim_std.rs diff --git a/Cargo.lock b/Cargo.lock index 5beda16a9d4..6a3c78946de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8486,7 +8486,6 @@ dependencies = [ "libc", "spin", "tokio", - "tracing", ] [[package]] diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index a86ee9d0fc4..0460432086b 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -16,10 +16,8 @@ tokio = { workspace = true, optional = true } async-task = { version = "4.4", optional = true } spin = { version = "0.9", default-features = false, features = ["mutex", "spin_mutex"], optional = true } libc = { version = "0.2", optional = true } -tracing = { workspace = true, optional = true } [features] default = ["tokio"] tokio = ["dep:tokio"] -simulation = ["dep:async-task", "dep:spin"] -simulation-std = ["simulation", "dep:libc", "dep:tracing"] +simulation = ["dep:async-task", "dep:spin", "dep:libc"] diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 576122be42e..d0443dc3cd9 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,127 +1,60 @@ # spacetimedb-runtime -`spacetimedb-runtime` is the runtime boundary shared by SpacetimeDB core code -and DST. The goal is not to emulate all of Tokio. We do not aim to support -`tokio::net`, `tokio::fs`, or arbitrary ecosystem compatibility here. The goal -is much narrower: provide the small amount of execution control that core -database code needs so that it can run under either a deterministic single- -threaded runtime or a hosted adapter. +`spacetimedb-runtime` is runtime boundary that lets SpacetimeDB core code run +under deterministic simulation testing (DST). -The crate is intentionally hybrid. Some parts of the process are naturally -Tokio-owned today, especially networking, subscriptions, and other integration- -heavy infrastructure. DST and selected core/database paths need a different -model: single-threaded, deterministic scheduling, explicit time, and a runtime -that can move toward `no_std + alloc`. This crate exists to support both -execution domains without forcing the whole process onto one scheduler. +DST runs code inside a deterministic simulator that controls nondeterministic +inputs instead of letting them come directly from host environment. Given same +seed, simulator should produce same trace. When it finds a bug, seed should be +enough to reproduce that bug exactly. -## Architecture - -The top-level type in [src/lib.rs](./src/lib.rs) is `Runtime`. It is the small -facade that shared core code should depend on. `Runtime` is not the simulator -itself and it is not Tokio. It is a tagged handle with the backends that matter -to SpacetimeDB: - -- `Runtime::Tokio(TokioHandle)` when the `tokio` feature is enabled -- `Runtime::Simulation(sim::Handle)` when the `simulation` feature is enabled - -Code such as durability and snapshotting should accept or store `Runtime` and -use only the narrow operations exposed there: `spawn`, `spawn_blocking`, and -`timeout`. That keeps shared logic independent of the hosted runtime choice. - -Under that facade, this crate has two layers. - -The first layer is the simulation core under [src/sim](./src/sim). This is the -deterministic single-thread runtime used by DST. The long-term direction for -this layer is `no_std + alloc`, explicit handles, explicit time, and no -dependency on ambient host facilities. - -The second layer is the hosted adapter layer under [src/adapter](./src/adapter). -Today that includes a Tokio adapter and std-hosted simulation conveniences. The -Tokio adapter exists because some production and testing paths still need a real -process runtime. The std-hosted simulation helpers exist because determinism -testing, thread-local convenience APIs, and Unix hooks are useful in hosted -environments even though they are not part of the portable simulation core. - -## Feature Model +For this to work, code under test must not read clocks, randomness, +scheduling, I/O, or network behavior directly from outer environment. Those +effects need interfaces that production can implement with hosted services and +DST can replace with simulated ones. -The crate is organized around features that reflect that layering. +This crate provides the execution-control part of that boundary: spawning, +timeouts, virtual time, deterministic randomness, task scheduling, and fault +decisions. Storage, networking, and replication should be modeled through +higher-level abstractions. -- `simulation` - Enables the deterministic simulation runtime core. This is the part that is - intended to move toward `no_std + alloc`. -- `simulation-std` - Enables std-hosted conveniences layered on top of `simulation`, such as - thread-local current-handle access, determinism replay helpers, and host OS - integration hooks used by DST in a normal process. -- `tokio` - Enables the Tokio-backed hosted adapter and remains part of the default - feature set for now. -- `std` - Enables hosted-only functionality shared by the adapter layer. - -This means “simulation” is not shorthand for “all simulation tooling.” It is -the portable runtime core. Hosted extras live behind `simulation-std`. - -## Simulation Core - -The simulation core lives under [src/sim](./src/sim). - -[src/sim/executor.rs](./src/sim/executor.rs) contains the single-threaded -deterministic executor. It stores ready tasks as `async_task` runnables, uses a -deterministic RNG to choose the next runnable, supports pause/resume by logical -node, and treats “no runnable work and no future timer wakeups” as a hang. - -[src/sim/time.rs](./src/sim/time.rs) contains virtual time. It owns simulated -time state, timer registration, and timeout behavior. The key property is that -time moves only under runtime control, not wall clock control. - -[src/sim/rng.rs](./src/sim/rng.rs) contains deterministic randomness. The -runtime uses this for scheduler choices, and test/workload code can use -`DecisionSource` when it needs deterministic probabilistic decisions. - -The public simulation surface is intentionally explicit: `sim::Runtime`, -`sim::Handle`, `sim::NodeId`, `sim::JoinHandle`, `yield_now`, and the virtual -time and RNG utilities. The portable direction is to make explicit-handle APIs -the main interface, with host-style convenience APIs layered separately. +## Architecture -## Adapter Layer +[src/lib.rs](./src/lib.rs) exposes `Runtime`, small runtime handle shared code +carries. It has two variants: -The adapter layer lives under [src/adapter](./src/adapter). +- `Runtime::Tokio(TokioHandle)` for hosted execution. +- `Runtime::Simulation(sim::Handle)` for deterministic simulation. -[src/adapter/tokio.rs](./src/adapter/tokio.rs) is the Tokio facade. It defines -the hosted Tokio types used by the top-level runtime facade and provides -`current_handle_or_new_runtime()` for production code that may or may not -already be inside a Tokio runtime. +[src/sim](./src/sim) contains simulation core. It is single-threaded and aims +toward `no_std + alloc` over time. This includes: -Std-hosted simulation helpers stay outside the simulation core as well. These -helpers are valuable, but they are adapters around the core, not the core -itself. Examples include thread-local “current runtime” access, determinism -replay helpers, and Unix hooks that prevent simulation from silently escaping -onto real OS threads. +- `executor`: single-threaded task scheduler with deterministic runnable selection. +- `time`: virtual clock, sleeps, and timeouts. +- `rng`: seeded deterministic randomness for scheduler and workload decisions. +- `buggify`: seeded fault-injection decisions. +- `config`: runtime seed and simulator configuration. -## Current Scope +[src/sim_std.rs](./src/sim_std.rs) contains hosted glue around simulator: -This crate is not trying to make the whole of core `no_std` immediately. For -now, crates such as `relational_db`, `snapshot`, `commitlog`, and `datastore` -may still use `tokio::sync` internally. That is acceptable in the short term, -because those synchronization primitives are runtime-agnostic enough for DST and -the current runtime boundary effort is focused on execution control, not total -removal of Tokio-adjacent types from core. +- `block_on` installs thread-local simulation context for hosted tests. +- `check_determinism` replays same seeded workload twice and compares trace. +- libc randomness hooks route entropy requests to runtime RNG while simulation + is active, and warn before delegating to host OS outside simulation. +- Unix thread hooks reject accidental `std::thread::spawn` while simulation is + active. -The longer-term goal is to reduce those dependencies where it materially helps -portability or determinism, but that work is explicitly out of scope for the -first phase of this crate architecture. +Tokio integration is intentionally small and lives directly in +[src/lib.rs](./src/lib.rs). -## Intended Usage +The crate is intentionally hybrid because SpacetimeDB is hybrid. Host-facing +systems such as networking, subscriptions, wasm host glue, auth, process +metrics, and CLI code may continue to use hosted infrastructure. Deep-core and +DST-facing paths should instead depend on `Runtime` or narrower +domain-specific traits passed in by the caller. -Shared core/database code should depend on `Runtime`, not on raw Tokio handles -or simulator internals. DST should construct `sim::Runtime` directly and use it -to drive deterministic test execution. Hosted production/testing code that still -needs Tokio should use the Tokio adapter through `Runtime::tokio(...)`, -`Runtime::tokio_current()`, and `current_handle_or_new_runtime()`. +Feature flags: -The likely end state is still hybrid: core/database execution may eventually run -on the same deterministic single-thread runtime in both DST and selected -production paths, while networking, clients, subscriptions, and other hosted -subsystems continue to live on Tokio. That is a deliberate design choice, not a -temporary inconsistency. +- `tokio`: enables hosted runtime backend and remains in default feature set. +- `simulation`: enables deterministic simulation runtime and hosted `sim_std` + helpers. diff --git a/crates/runtime/src/adapter/mod.rs b/crates/runtime/src/adapter/mod.rs deleted file mode 100644 index a254877d883..00000000000 --- a/crates/runtime/src/adapter/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[cfg(feature = "tokio")] -pub mod tokio; - -#[cfg(feature = "simulation-std")] -pub mod sim_std; diff --git a/crates/runtime/src/adapter/sim_std.rs b/crates/runtime/src/adapter/sim_std.rs deleted file mode 100644 index 2eaa160adec..00000000000 --- a/crates/runtime/src/adapter/sim_std.rs +++ /dev/null @@ -1,361 +0,0 @@ -use alloc::boxed::Box; -use core::{ - cell::{Cell, RefCell}, - future::Future, - ptr, - time::Duration, -}; -use std::sync::OnceLock; - -use crate::sim; - -thread_local! { - static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; - static CURRENT_RNG: RefCell> = const { RefCell::new(None) }; - static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; - static IN_SIMULATION: Cell = const { Cell::new(false) }; -} - -pub(crate) struct HandleContextGuard { - previous: Option, -} - -pub(crate) struct RngContextGuard { - previous: Option, -} - -pub(crate) struct SimulationThreadGuard { - previous: bool, -} - -pub fn simulation_current() -> crate::Runtime { - crate::Runtime::simulation(current_handle().expect("simulation runtime is not active on this thread")) -} - -pub fn block_on(runtime: &mut sim::Runtime, future: F) -> F::Output { - ensure_rng_hooks_linked(); - if !init_std_random_state(runtime.rng().seed()) { - tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); - } - let _handle_context = enter_handle_context(runtime.handle()); - let _system_thread_context = enter_simulation_thread(); - let _rng_context = enter_rng_context(runtime.rng()); - runtime.block_on(future) -} - -pub fn current_handle() -> Option { - CURRENT_HANDLE.with(|handle| handle.borrow().clone()) -} - -pub fn advance_time(duration: Duration) { - current_handle() - .expect("simulation runtime is not active on this thread") - .advance(duration); -} - -pub fn now() -> Duration { - current_handle().map(|handle| handle.now()).unwrap_or_default() -} - -pub fn sleep(duration: Duration) -> sim::time::Sleep { - current_handle() - .expect("sim::time::sleep polled outside sim runtime") - .sleep(duration) -} - -pub async fn timeout(duration: Duration, future: impl Future) -> Result { - current_handle() - .expect("sim::time::timeout polled outside sim runtime") - .timeout(duration, future) - .await -} - -pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output -where - F: Future + 'static, - F::Output: Send + 'static, -{ - check_determinism_with(seed, make_future) -} - -pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output -where - M: Fn() -> F + Clone + Send + 'static, - F: Future + 'static, - F::Output: Send + 'static, -{ - let first = make_future.clone(); - let log = std::thread::spawn(move || { - let mut runtime = sim::Runtime::new(seed); - runtime.enable_determinism_log(); - block_on(&mut runtime, first()); - runtime - .take_determinism_log() - .expect("determinism log should be enabled") - }) - .join() - .map_err(|payload| panic_with_seed(seed, payload)) - .unwrap(); - - std::thread::spawn(move || { - let mut runtime = sim::Runtime::new(seed); - runtime.enable_determinism_check(log); - let output = block_on(&mut runtime, make_future()); - runtime.finish_determinism_check().unwrap_or_else(|err| panic!("{err}")); - output - }) - .join() - .map_err(|payload| panic_with_seed(seed, payload)) - .unwrap() -} - -pub fn enable_buggify() { - current_handle() - .expect("simulation runtime is not active on this thread") - .enable_buggify(); -} - -pub fn disable_buggify() { - current_handle() - .expect("simulation runtime is not active on this thread") - .disable_buggify(); -} - -pub fn is_buggify_enabled() -> bool { - current_handle().is_some_and(|handle| handle.is_buggify_enabled()) -} - -pub fn buggify() -> bool { - current_handle() - .expect("simulation runtime is not active on this thread") - .buggify() -} - -pub fn buggify_with_prob(probability: f64) -> bool { - current_handle() - .expect("simulation runtime is not active on this thread") - .buggify_with_prob(probability) -} - -pub(crate) fn enter_handle_context(handle: sim::Handle) -> HandleContextGuard { - let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); - HandleContextGuard { previous } -} - -pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { - let previous = IN_SIMULATION.with(|state| state.replace(true)); - SimulationThreadGuard { previous } -} - -pub(crate) fn enter_rng_context(rng: sim::GlobalRng) -> RngContextGuard { - let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); - RngContextGuard { previous } -} - -fn in_simulation() -> bool { - IN_SIMULATION.with(Cell::get) -} - -fn init_std_random_state(seed: u64) -> bool { - STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); - let _ = std::collections::hash_map::RandomState::new(); - STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() -} - -fn ensure_rng_hooks_linked() { - unsafe { - getentropy(ptr::null_mut(), 0); - } -} - -fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { - if buflen == 0 { - return; - } - let rng = sim::GlobalRng::new(seed); - let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; - rng.fill_bytes(buf); -} - -fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { - CURRENT_RNG.with(|current| { - let Some(rng) = current.borrow().clone() else { - return false; - }; - if buflen == 0 { - return true; - } - let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; - rng.fill_bytes(buf); - true - }) -} - -fn panic_with_seed(seed: u64, payload: Box) -> ! { - eprintln!("note: run with --seed {seed} to reproduce this error"); - std::panic::resume_unwind(payload); -} - -impl Drop for HandleContextGuard { - fn drop(&mut self) { - CURRENT_HANDLE.with(|slot| { - *slot.borrow_mut() = self.previous.take(); - }); - } -} - -impl Drop for RngContextGuard { - fn drop(&mut self) { - CURRENT_RNG.with(|current| { - current.replace(self.previous.take()); - }); - } -} - -impl Drop for SimulationThreadGuard { - fn drop(&mut self) { - IN_SIMULATION.with(|state| { - state.set(self.previous); - }); - } -} - -#[cfg(unix)] -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { - if in_simulation() { - eprintln!("attempt to spawn a system thread in simulation."); - eprintln!("note: use simulator tasks instead."); - return -1; - } - - type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; - static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); - let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); - std::mem::transmute(ptr) - }); - unsafe { original(attr) } -} - -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { - #[cfg(target_os = "macos")] - let _ = flags; - - if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { - fill_from_seed(buf, buflen, seed); - return buflen as isize; - } - if fill_from_current_rng(buf, buflen) { - return buflen as isize; - } - - #[cfg(target_os = "linux")] - { - type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; - static GETRANDOM: OnceLock = OnceLock::new(); - let original = GETRANDOM.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original getrandom"); - std::mem::transmute(ptr) - }); - unsafe { original(buf, buflen, flags) } - } - - #[cfg(target_os = "macos")] - { - type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; - static GETENTROPY: OnceLock = OnceLock::new(); - let original = GETENTROPY.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original getentropy"); - std::mem::transmute(ptr) - }); - match unsafe { original(buf, buflen) } { - -1 => -1, - 0 => buflen as isize, - _ => unreachable!("unexpected getentropy return value"), - } - } - - #[cfg(not(any(target_os = "linux", target_os = "macos")))] - { - let _ = (buf, buflen, flags); - compile_error!("unsupported OS for DST getrandom override"); - } -} - -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { - if buflen > 256 { - return -1; - } - match unsafe { getrandom(buf, buflen, 0) } { - -1 => -1, - _ => 0, - } -} - -#[cfg(target_os = "macos")] -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { - match unsafe { getrandom(bytes, count, 0) } { - -1 => -1, - _ => 0, - } -} - -#[cfg(test)] -mod tests { - use crate::sim; - - #[test] - #[cfg(unix)] - fn runtime_forbids_system_thread_spawn() { - let mut runtime = sim::Runtime::new(200); - runtime.block_on(async { - let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); - assert!(result.is_err()); - }); - } - - #[test] - fn getentropy_uses_current_sim_rng() { - let rng = sim::GlobalRng::new(20); - let _guard = enter_rng_context(rng.clone()); - - let mut actual = [0u8; 24]; - unsafe { - assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); - } - - let expected_rng = sim::GlobalRng::new(20); - let mut expected = [0u8; 24]; - expected_rng.fill_bytes(&mut expected); - assert_eq!(actual, expected); - } - - #[test] - fn std_hashmap_order_is_seeded_for_runtime_thread() { - fn order_for(seed: u64) -> Vec<(u64, u64)> { - std::thread::spawn(move || { - let _ = init_std_random_state(seed); - (0..12) - .map(|idx| (idx, idx)) - .collect::>() - .into_iter() - .collect() - }) - .join() - .unwrap() - } - - assert_eq!(order_for(30), order_for(30)); - } -} diff --git a/crates/runtime/src/adapter/tokio.rs b/crates/runtime/src/adapter/tokio.rs deleted file mode 100644 index 5d605bba39a..00000000000 --- a/crates/runtime/src/adapter/tokio.rs +++ /dev/null @@ -1,11 +0,0 @@ -pub type TokioHandle = tokio::runtime::Handle; -pub type TokioRuntime = tokio::runtime::Runtime; - -pub fn current_handle_or_new_runtime() -> std::io::Result<(TokioHandle, Option)> { - if let Ok(handle) = TokioHandle::try_current() { - return Ok((handle, None)); - } - - let runtime = TokioRuntime::new()?; - Ok((runtime.handle().clone(), Some(runtime))) -} diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 9fe4487b235..90345998df5 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(any(feature = "tokio", feature = "simulation-std")), no_std)] +#![cfg_attr(not(any(feature = "tokio", feature = "simulation")), no_std)] //! Runtime and deterministic simulation utilities shared by core and DST. @@ -13,12 +13,25 @@ use core::{ time::Duration, }; -pub mod adapter; #[cfg(feature = "simulation")] pub mod sim; +#[cfg(feature = "simulation")] +pub mod sim_std; #[cfg(feature = "tokio")] -pub use adapter::tokio::{current_handle_or_new_runtime, TokioHandle, TokioRuntime}; +pub type TokioHandle = tokio::runtime::Handle; +#[cfg(feature = "tokio")] +pub type TokioRuntime = tokio::runtime::Runtime; + +#[cfg(feature = "tokio")] +pub fn current_handle_or_new_runtime() -> std::io::Result<(TokioHandle, Option)> { + if let Ok(handle) = TokioHandle::try_current() { + return Ok((handle, None)); + } + + let runtime = TokioRuntime::new()?; + Ok((runtime.handle().clone(), Some(runtime))) +} #[derive(Clone)] pub enum Runtime { @@ -77,99 +90,121 @@ impl AbortHandle { } } -impl fmt::Display for JoinError { +impl JoinErrorInner { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - #[cfg(not(any(feature = "tokio", feature = "simulation")))] - let _ = f; - match &self.inner { + match self { #[cfg(feature = "tokio")] - JoinErrorInner::Tokio(err) => err.fmt(f), + Self::Tokio(err) => fmt::Display::fmt(err, f), #[cfg(feature = "simulation")] - JoinErrorInner::Simulation(err) => err.fmt(f), - #[cfg(not(any(feature = "tokio", feature = "simulation")))] - _ => unreachable!("runtime join error has no enabled backend"), + Self::Simulation(err) => fmt::Display::fmt(err, f), } } } -#[cfg(any(feature = "tokio", feature = "simulation-std"))] +impl fmt::Display for JoinError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = f; + #[cfg(any(feature = "tokio", feature = "simulation"))] + return self.inner.fmt(f); + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + unreachable!("runtime join error has no enabled backend") + } +} + +#[cfg(any(feature = "tokio", feature = "simulation"))] impl std::error::Error for JoinError {} -impl JoinHandle { - pub fn abort_handle(&self) -> AbortHandle { - match &self.inner { +impl JoinHandleInner { + fn abort_handle(&self) -> AbortHandle { + match self { #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(Some(handle)) => AbortHandle { + Self::Tokio(Some(handle)) => AbortHandle { inner: AbortHandleInner::Tokio(handle.abort_handle()), }, #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(Some(handle)) => AbortHandle { + Self::Simulation(Some(handle)) => AbortHandle { inner: AbortHandleInner::Simulation(handle.abort_handle()), }, #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(None) => panic!("runtime join handle aborted after detach"), + Self::Tokio(None) => panic!("runtime join handle aborted after detach"), #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(None) => panic!("runtime join handle aborted after detach"), - JoinHandleInner::Detached(_) => panic!("runtime join handle aborted after completion"), + Self::Simulation(None) => panic!("runtime join handle aborted after detach"), + Self::Detached(_) => panic!("runtime join handle aborted after completion"), } } - pub fn detach(mut self) { - self.detach_inner(); - } - - fn detach_inner(&mut self) { - match &mut self.inner { + fn detach(&mut self) { + match self { #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(handle) => { + Self::Tokio(handle) => { drop(handle.take()); } #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(handle) => { + Self::Simulation(handle) => { if let Some(handle) = handle.take() { handle.detach(); } } - JoinHandleInner::Detached(_) => {} + Self::Detached(_) => {} } - self.inner = JoinHandleInner::Detached(PhantomData); } -} -impl Future for JoinHandle { - type Output = Result; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - #[cfg(not(any(feature = "tokio", feature = "simulation")))] - let _ = cx; - match &mut self.inner { + fn poll_result(&mut self, cx: &mut Context<'_>) -> Poll> { + match self { #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(Some(handle)) => match Pin::new(handle).poll(cx) { - Poll::Ready(Ok(output)) => { - self.inner = JoinHandleInner::Detached(PhantomData); - Poll::Ready(Ok(output)) - } + Self::Tokio(Some(handle)) => match Pin::new(handle).poll(cx) { + Poll::Ready(Ok(output)) => Poll::Ready(Ok(output)), Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { inner: JoinErrorInner::Tokio(err), })), Poll::Pending => Poll::Pending, }, #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(Some(handle)) => match Pin::new(handle).poll_join(cx) { - Poll::Ready(Ok(output)) => { - self.inner = JoinHandleInner::Detached(PhantomData); - Poll::Ready(Ok(output)) - } + Self::Simulation(Some(handle)) => match Pin::new(handle).poll_join(cx) { + Poll::Ready(Ok(output)) => Poll::Ready(Ok(output)), Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { inner: JoinErrorInner::Simulation(err), })), Poll::Pending => Poll::Pending, }, #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(None) => panic!("runtime join handle polled after detach"), + Self::Tokio(None) => panic!("runtime join handle polled after detach"), #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(None) => panic!("runtime join handle polled after detach"), - JoinHandleInner::Detached(_) => panic!("runtime join handle polled after completion"), + Self::Simulation(None) => panic!("runtime join handle polled after detach"), + Self::Detached(_) => panic!("runtime join handle polled after completion"), + } + } +} + +impl JoinHandle { + pub fn abort_handle(&self) -> AbortHandle { + self.inner.abort_handle() + } + + pub fn detach(mut self) { + self.detach_inner(); + } + + fn detach_inner(&mut self) { + self.inner.detach(); + self.inner = JoinHandleInner::Detached(PhantomData); + } +} + +impl Future for JoinHandle { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = cx; + match self.inner.poll_result(cx) { + Poll::Ready(Ok(output)) => { + self.inner = JoinHandleInner::Detached(PhantomData); + Poll::Ready(Ok(output)) + } + Poll::Ready(Err(err)) => Poll::Ready(Err(err)), + Poll::Pending => Poll::Pending, } } } @@ -191,30 +226,32 @@ impl fmt::Display for RuntimeTimeout { } } -#[cfg(any(feature = "tokio", feature = "simulation-std"))] +#[cfg(any(feature = "tokio", feature = "simulation"))] impl std::error::Error for RuntimeTimeout {} +#[cfg(feature = "tokio")] impl Runtime { - #[cfg(feature = "tokio")] pub fn tokio(handle: TokioHandle) -> Self { Self::Tokio(handle) } - #[cfg(feature = "tokio")] pub fn tokio_current() -> Self { Self::tokio(TokioHandle::current()) } +} - #[cfg(feature = "simulation")] +#[cfg(feature = "simulation")] +impl Runtime { pub fn simulation(handle: sim::Handle) -> Self { Self::Simulation(handle) } - #[cfg(feature = "simulation-std")] pub fn simulation_current() -> Self { - adapter::sim_std::simulation_current() + sim_std::simulation_current() } +} +impl Runtime { pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle<()> { #[cfg(not(any(feature = "tokio", feature = "simulation")))] let _ = future; diff --git a/crates/runtime/src/sim/config.rs b/crates/runtime/src/sim/config.rs deleted file mode 100644 index 92ab8d0fdbc..00000000000 --- a/crates/runtime/src/sim/config.rs +++ /dev/null @@ -1,16 +0,0 @@ -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct RuntimeConfig { - pub seed: u64, -} - -impl RuntimeConfig { - pub const fn new(seed: u64) -> Self { - Self { seed } - } -} - -impl Default for RuntimeConfig { - fn default() -> Self { - Self::new(0) - } -} diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 597bfcc4e09..04abae27020 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -13,10 +13,27 @@ use core::{ use futures_util::FutureExt; use spin::Mutex; -use crate::sim::{time::TimeHandle, Rng, RuntimeConfig}; +use crate::sim::{time::TimeHandle, Rng}; type Runnable = async_task::Runnable; +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RuntimeConfig { + pub seed: u64, +} + +impl RuntimeConfig { + pub const fn new(seed: u64) -> Self { + Self { seed } + } +} + +impl Default for RuntimeConfig { + fn default() -> Self { + Self::new(0) + } +} + /// A unique identifier for a simulated node. #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct NodeId(u64); @@ -303,7 +320,7 @@ impl fmt::Display for JoinError { } } -#[cfg(feature = "simulation-std")] +#[cfg(feature = "simulation")] impl std::error::Error for JoinError {} struct AbortState { @@ -675,7 +692,7 @@ mod tests { let actual = (0..8).map(|_| handle.buggify_with_prob(0.5)).collect::>(); let expected = { - let mut rng = Rng::new(77); + let rng = Rng::new(77); rng.enable_buggify(); (0..8).map(|_| rng.buggify_with_prob(0.5)).collect::>() }; @@ -696,15 +713,14 @@ mod tests { assert!(!runtime.is_buggify_enabled()); } - #[cfg(feature = "simulation-std")] + #[cfg(feature = "simulation")] #[test] fn current_handle_can_spawn_local_task_inside_runtime() { - assert!(crate::adapter::sim_std::current_handle().is_none()); + assert!(crate::sim_std::current_handle().is_none()); let mut runtime = Runtime::new(5); - let value = crate::adapter::sim_std::block_on(&mut runtime, async { - let handle = - crate::adapter::sim_std::current_handle().expect("sim handle should be present inside block_on"); + let value = crate::sim_std::block_on(&mut runtime, async { + let handle = crate::sim_std::current_handle().expect("sim handle should be present inside block_on"); let node = handle.create_node(); let captured = std::rc::Rc::new(17); handle @@ -716,16 +732,16 @@ mod tests { }); assert_eq!(value, 17); - assert!(crate::adapter::sim_std::current_handle().is_none()); + assert!(crate::sim_std::current_handle().is_none()); } - #[cfg(feature = "simulation-std")] + #[cfg(feature = "simulation")] #[test] fn check_determinism_runs_future_twice() { static CALLS: AtomicUsize = AtomicUsize::new(0); CALLS.store(0, Ordering::SeqCst); - let value = crate::adapter::sim_std::check_determinism(3, || async { + let value = crate::sim_std::check_determinism(3, || async { CALLS.fetch_add(1, Ordering::SeqCst); yield_now().await; 13 @@ -735,14 +751,14 @@ mod tests { assert_eq!(CALLS.load(Ordering::SeqCst), 2); } - #[cfg(feature = "simulation-std")] + #[cfg(feature = "simulation")] #[test] #[should_panic(expected = "non-determinism detected")] fn check_determinism_rejects_different_scheduler_sequence() { static FIRST_RUN: AtomicBool = AtomicBool::new(true); FIRST_RUN.store(true, Ordering::SeqCst); - crate::adapter::sim_std::check_determinism(4, || async { + crate::sim_std::check_determinism(4, || async { if FIRST_RUN.swap(false, Ordering::SeqCst) { yield_now().await; } diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index 9575958f30d..4a87c3ef7ac 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -5,12 +5,10 @@ //! deterministic RNG instead of being driven by a package-level async runtime. pub mod buggify; -mod config; mod executor; mod rng; pub mod time; -pub use config::RuntimeConfig; -pub use executor::{yield_now, AbortHandle, Handle, JoinError, JoinHandle, NodeId, Runtime}; +pub use executor::{yield_now, AbortHandle, Handle, JoinError, JoinHandle, NodeId, Runtime, RuntimeConfig}; pub(crate) use rng::DeterminismLog; pub use rng::{GlobalRng, Rng}; diff --git a/crates/runtime/src/sim/rng.rs b/crates/runtime/src/sim/rng.rs index 602eae59979..b39219290dd 100644 --- a/crates/runtime/src/sim/rng.rs +++ b/crates/runtime/src/sim/rng.rs @@ -9,7 +9,7 @@ pub type Rng = GlobalRng; /// The simulator owns one runtime-wide RNG handle and uses it for scheduler /// choices, probabilistic fault injection, and determinism checks. Hosted /// conveniences such as thread-local current-RNG access and libc random hooks -/// live in `adapter::sim_std`, not here. +/// live in `crate::sim_std`, not here. #[derive(Clone, Debug)] pub struct GlobalRng { inner: Arc>, @@ -200,43 +200,3 @@ fn probability_sample(value: u64, probability: f64) -> bool { fn checksum(value: u64) -> u8 { value.to_ne_bytes().into_iter().fold(0, |acc, byte| acc ^ byte) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn rng_log_check_accepts_same_sequence() { - let first = Rng::new(10); - first.enable_determinism_log(); - let first_values = (0..8).map(|_| first.next_u64()).collect::>(); - let log = first.take_determinism_log().unwrap(); - - let second = Rng::new(10); - second.enable_determinism_check(log); - let second_values = (0..8).map(|_| second.next_u64()).collect::>(); - second.finish_determinism_check().unwrap(); - - assert_eq!(first_values, second_values); - } - - #[test] - fn buggify_is_disabled_by_default() { - let rng = Rng::new(20); - for _ in 0..8 { - assert!(!rng.buggify()); - assert!(!rng.buggify_with_prob(1.0)); - } - } - - #[test] - fn buggify_obeys_enable_and_disable() { - let rng = Rng::new(21); - rng.enable_buggify(); - assert!(rng.is_buggify_enabled()); - assert!(rng.buggify_with_prob(1.0)); - rng.disable_buggify(); - assert!(!rng.is_buggify_enabled()); - assert!(!rng.buggify_with_prob(1.0)); - } -} diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index 6210675f638..ed559fa70d3 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -194,7 +194,7 @@ impl fmt::Display for TimeoutElapsed { } } -#[cfg(any(feature = "tokio", feature = "simulation-std"))] +#[cfg(any(feature = "tokio", feature = "simulation"))] impl std::error::Error for TimeoutElapsed {} #[cfg(test)] diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs new file mode 100644 index 00000000000..5f936cc5aa4 --- /dev/null +++ b/crates/runtime/src/sim_std.rs @@ -0,0 +1,327 @@ +//! Std-hosted entry points for running the deterministic simulator in tests. +//! +//! The portable simulator lives in [`crate::sim`]. This module is deliberately +//! host-specific: it installs thread-local context while a simulation is +//! running, checks determinism by replaying a seed in fresh OS threads, and +//! intercepts a few libc calls so std code cannot silently escape determinism. + +use alloc::boxed::Box; +use core::{ + cell::{Cell, RefCell}, + future::Future, + ptr, +}; +use std::sync::OnceLock; + +use crate::sim; + +// Public entry points. + +/// Return the generic runtime facade for the current simulation thread. +/// +/// Prefer passing explicit [`sim::Handle`] values in simulation code. This is a +/// hosted convenience for code paths that already accept [`crate::Runtime`]. +pub fn simulation_current() -> crate::Runtime { + crate::Runtime::simulation(current_handle().expect("simulation runtime is not active on this thread")) +} + +/// Run a future to completion with std-hosted determinism guards installed. +/// +/// This wraps [`sim::Runtime::block_on`] and is the normal entry point for DST +/// tests that execute inside a hosted process. While the future runs, this +/// function exposes the current simulation handle, routes std randomness +/// through the simulation RNG, and marks the thread as inside simulation so OS +/// thread spawns can be rejected. +pub fn block_on(runtime: &mut sim::Runtime, future: F) -> F::Output { + let _handle_context = enter_handle_context(runtime.handle()); + let _system_thread_context = enter_simulation_thread(); + let _rng_context = enter_rng_context(runtime.rng()); + ensure_rng_hooks_linked(); + runtime.block_on(future) +} + +/// Return the current simulation handle if this thread is inside [`block_on`]. +/// +/// This is intentionally the only ambient context accessor. Time, buggify, and +/// task APIs should be reached through the returned handle or through explicit +/// handles passed by the caller. +pub fn current_handle() -> Option { + CURRENT_HANDLE.with(|handle| handle.borrow().clone()) +} + +/// Run the same future factory twice and assert that both runs consume the same +/// deterministic RNG/scheduler trace. +/// +/// Each pass runs on a fresh OS thread so thread-local std state is not shared +/// between the recording and replay passes. +pub fn check_determinism(seed: u64, make_future: M) -> F::Output +where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, +{ + let first = make_future.clone(); + let log = std::thread::spawn(move || { + let mut runtime = sim::Runtime::new(seed); + runtime.enable_determinism_log(); + block_on(&mut runtime, first()); + runtime + .take_determinism_log() + .expect("determinism log should be enabled") + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap(); + + std::thread::spawn(move || { + let mut runtime = sim::Runtime::new(seed); + runtime.enable_determinism_check(log); + let output = block_on(&mut runtime, make_future()); + runtime.finish_determinism_check().unwrap_or_else(|err| panic!("{err}")); + output + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap() +} + +fn panic_with_seed(seed: u64, payload: Box) -> ! { + eprintln!("note: run with --seed {seed} to reproduce this error"); + std::panic::resume_unwind(payload); +} + +// Simulation thread context. + +// Ambient state used only while `sim_std::block_on` is driving a simulation. +// +// The simulator itself stays explicit-handle based. These thread-locals exist +// because std and libc hooks do not accept a `sim::Handle` parameter, and +// because a few hosted test helpers need a current runtime while executing on +// the simulation thread. +thread_local! { + // Lets hosted glue recover the active runtime handle without passing it + // through every call. This should stay a convenience, not the primary API. + static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; + // Feeds deterministic bytes to host randomness calls made during an active + // simulation. Every such request advances the runtime RNG trace. + static CURRENT_RNG: RefCell> = const { RefCell::new(None) }; + // Marks the current OS thread as simulation-owned so thread creation hooks + // can reject accidental escapes to the host scheduler. + static IN_SIMULATION: Cell = const { Cell::new(false) }; +} + +struct CurrentHandleGuard { + previous: Option, +} + +struct CurrentRngGuard { + previous: Option, +} + +struct SimulationThreadGuard { + previous: bool, +} + +fn enter_handle_context(handle: sim::Handle) -> CurrentHandleGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); + CurrentHandleGuard { previous } +} + +fn enter_simulation_thread() -> SimulationThreadGuard { + let previous = IN_SIMULATION.with(|state| state.replace(true)); + SimulationThreadGuard { previous } +} + +fn enter_rng_context(rng: sim::GlobalRng) -> CurrentRngGuard { + let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); + CurrentRngGuard { previous } +} + +fn in_simulation() -> bool { + IN_SIMULATION.with(Cell::get) +} + +impl Drop for CurrentHandleGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + *slot.borrow_mut() = self.previous.take(); + }); + } +} + +impl Drop for CurrentRngGuard { + fn drop(&mut self) { + CURRENT_RNG.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +impl Drop for SimulationThreadGuard { + fn drop(&mut self) { + IN_SIMULATION.with(|state| { + state.set(self.previous); + }); + } +} + +// Randomness hook helpers. + +// Make sure our exported random hook is present in the final test binary. +// +// Some platforms only resolve getrandom/getentropy lazily. Calling it with a +// zero-length buffer is a no-op for behavior, but forces the symbol path to be +// linked before simulation code starts depending on it. +fn ensure_rng_hooks_linked() { + unsafe { + // Force the local getentropy symbol to be linked even if the host std + // library does not call it during this particular test. + getentropy(ptr::null_mut(), 0); + } +} + +// Fill bytes from the current runtime RNG when host code asks for randomness +// during an active simulation. +// +// This is the intentional deterministic substitute for OS randomness. If no +// simulation RNG is installed, the caller is outside `sim_std::block_on` and +// the libc hook should warn before delegating to the host OS. +fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { + CURRENT_RNG.with(|current| { + let Some(rng) = current.borrow().clone() else { + return false; + }; + if buflen == 0 { + return true; + } + let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; + rng.fill_bytes(buf); + true + }) +} + +// Thread hook. + +// Hook Unix thread creation by interposing `pthread_attr_init`. +// +// `std::thread::Builder::spawn` initializes pthread attributes before creating +// the thread. Returning an error here while simulation is active makes hidden +// OS thread creation fail early, before host scheduling can affect replay. +// Outside simulation, this delegates to the real libc symbol through `RTLD_NEXT`. +#[cfg(unix)] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { + // std::thread enters libc through pthread_attr_init on Unix. Refusing that + // call while in simulation keeps hidden OS scheduling out of DST. + if in_simulation() { + eprintln!("attempt to spawn a system thread in simulation."); + eprintln!("note: use simulator tasks instead."); + return -1; + } + + type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; + static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); + let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { + // `RTLD_NEXT` skips this interposed function and finds the libc + // implementation that would have been called without the simulator. + let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); + std::mem::transmute(ptr) + }); + unsafe { original(attr) } +} + +// Randomness syscall hooks. + +// Hook OS randomness by interposing `getrandom`. +// +// Code running inside simulation consumes bytes from the runtime RNG. Code +// outside simulation warns and falls back to host randomness so hosted test +// code continues to work. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { + if fill_from_current_rng(buf, buflen) { + // Randomness requested while a simulation is active is deterministic + // and advances the runtime RNG trace. + return buflen as isize; + } + + eprintln!("warning: randomness requested outside simulation; delegating to host OS"); + unsafe { real_getrandom()(buf, buflen, flags) } +} + +#[cfg(target_os = "linux")] +fn real_getrandom() -> unsafe extern "C" fn(*mut u8, usize, u32) -> isize { + type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; + static GETRANDOM: OnceLock = OnceLock::new(); + *GETRANDOM.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getrandom"); + std::mem::transmute(ptr) + }) +} + +#[cfg(not(target_os = "linux"))] +fn real_getrandom() -> unsafe extern "C" fn(*mut u8, usize, u32) -> isize { + compile_error!("unsupported OS for DST getrandom override"); +} + +// Hook `getentropy` and route it through the same deterministic path as +// `getrandom`. +// +// The 256-byte limit is part of the getentropy contract. Keeping this wrapper +// small means all entropy decisions stay centralized in `getrandom`. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { + if buflen > 256 { + return -1; + } + match unsafe { getrandom(buf, buflen, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(test)] +mod tests { + use crate::sim; + + use super::{enter_rng_context, getentropy}; + + #[test] + #[cfg(unix)] + fn runtime_forbids_system_thread_spawn() { + let mut runtime = sim::Runtime::new(200); + super::block_on(&mut runtime, async { + let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); + assert!(result.is_err()); + }); + } + + #[test] + fn getentropy_uses_current_sim_rng() { + let rng = sim::GlobalRng::new(20); + let _guard = enter_rng_context(rng.clone()); + + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + + let expected_rng = sim::GlobalRng::new(20); + let mut expected = [0u8; 24]; + expected_rng.fill_bytes(&mut expected); + assert_eq!(actual, expected); + } + + #[test] + fn getentropy_delegates_to_host_randomness_outside_simulation() { + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + } +} diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs index 18d45b894a2..dea5dda6555 100644 --- a/crates/runtime/tests/sim_e2e.rs +++ b/crates/runtime/tests/sim_e2e.rs @@ -2,11 +2,18 @@ use std::{sync::Arc, time::Duration}; +use futures::{ + channel::{mpsc, oneshot}, + StreamExt, +}; use spacetimedb_runtime::sim::{buggify, Rng, Runtime}; use spin::Mutex; #[test] fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { + // Exercises the executor, node pause/resume, and timer wheel together: + // paused node work must not run until resumed, and all nodes must observe + // one shared virtual clock. let mut runtime = Runtime::new(101); let handle = runtime.handle(); let node_a = runtime.create_node(); @@ -52,8 +59,91 @@ fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { assert_eq!(runtime.elapsed(), Duration::from_millis(3)); } +#[test] +fn client_server_request_response_uses_virtual_time() { + // Models a small client/server exchange without real networking: the client + // sends requests over an in-memory channel, and the server replies after + // deterministic virtual latency on a different simulated node. + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + struct Response { + id: u64, + value: u64, + at: Duration, + } + + struct Request { + id: u64, + input: u64, + respond_to: oneshot::Sender, + } + + let mut runtime = Runtime::new(404); + let handle = runtime.handle(); + let client_node = runtime.create_node(); + let server_node = runtime.create_node(); + let (request_tx, mut request_rx) = mpsc::unbounded::(); + + let responses = runtime.block_on(async move { + let server_handle = handle.clone(); + let server = handle.spawn_on(server_node, async move { + for _ in 0..3 { + let request = request_rx.next().await.expect("client should send request"); + server_handle.sleep(Duration::from_millis(request.id + 1)).await; + request + .respond_to + .send(Response { + id: request.id, + value: request.input * 10, + at: server_handle.now(), + }) + .expect("client should wait for response"); + } + }); + + let client = handle.spawn_on(client_node, async move { + let mut responses = Vec::new(); + for (id, input) in [(2, 7), (0, 4), (1, 5)] { + let (respond_to, response_rx) = oneshot::channel(); + request_tx + .unbounded_send(Request { id, input, respond_to }) + .expect("server inbox should be open"); + responses.push(response_rx.await.expect("server should reply")); + } + responses + }); + + let responses = client.await; + server.await; + responses + }); + + assert_eq!( + responses, + vec![ + Response { + id: 2, + value: 70, + at: Duration::from_millis(3) + }, + Response { + id: 0, + value: 40, + at: Duration::from_millis(4) + }, + Response { + id: 1, + value: 50, + at: Duration::from_millis(6) + }, + ] + ); + assert_eq!(runtime.elapsed(), Duration::from_millis(6)); +} + #[test] fn runtime_buggify_matches_standalone_rng_sequence() { + // Checks that runtime-owned buggify decisions consume the same seeded RNG + // sequence as an explicit `Rng`, making injected faults replayable by seed. let seed = 77; let runtime = Runtime::new(seed); let expected = Rng::new(seed); @@ -76,6 +166,9 @@ fn runtime_buggify_matches_standalone_rng_sequence() { #[test] fn multi_node_timeout_uses_shared_virtual_clock() { + // Verifies timeout races are driven by virtual time, not wall time: the + // fast node completes at 2ms, then the slow node times out at the shared + // 4ms deadline. let mut runtime = Runtime::new(303); let handle = runtime.handle(); let slow_node = runtime.create_node(); From 9789d707fe7c2a3652fad34ee9c09f31e1968555 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 17:02:23 +0530 Subject: [PATCH 53/74] more polishing --- crates/runtime/README.md | 55 ++++- crates/runtime/src/lib.rs | 14 +- crates/runtime/src/sim/executor.rs | 222 ++++++++++++++---- crates/runtime/src/sim/mod.rs | 4 +- crates/runtime/src/sim/time/mod.rs | 4 +- crates/runtime/src/sim_std.rs | 143 +----------- crates/runtime/tests/sim_e2e.rs | 353 ++++++++++++++++++++--------- 7 files changed, 493 insertions(+), 302 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index d0443dc3cd9..41af4cee621 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -33,28 +33,65 @@ toward `no_std + alloc` over time. This includes: - `time`: virtual clock, sleeps, and timeouts. - `rng`: seeded deterministic randomness for scheduler and workload decisions. - `buggify`: seeded fault-injection decisions. -- `config`: runtime seed and simulator configuration. +- `node`: node builders and node-local scheduling handles. [src/sim_std.rs](./src/sim_std.rs) contains hosted glue around simulator: -- `block_on` installs thread-local simulation context for hosted tests. +- `block_on` installs hosted simulation guards for tests. - `check_determinism` replays same seeded workload twice and compares trace. -- libc randomness hooks route entropy requests to runtime RNG while simulation - is active, and warn before delegating to host OS outside simulation. +- libc randomness hooks warn and delegate if code reaches host entropy. - Unix thread hooks reject accidental `std::thread::spawn` while simulation is active. Tokio integration is intentionally small and lives directly in [src/lib.rs](./src/lib.rs). -The crate is intentionally hybrid because SpacetimeDB is hybrid. Host-facing -systems such as networking, subscriptions, wasm host glue, auth, process -metrics, and CLI code may continue to use hosted infrastructure. Deep-core and -DST-facing paths should instead depend on `Runtime` or narrower -domain-specific traits passed in by the caller. Feature flags: - `tokio`: enables hosted runtime backend and remains in default feature set. - `simulation`: enables deterministic simulation runtime and hosted `sim_std` helpers. + +## Scope and Limitations + +- **Single-threaded runtime.** The simulator exposes interleaving and timeout + bugs, but not bugs that require true parallel execution. The direction is to + keep deep-core code single-threaded or close to thread-per-core; simulating + real parallelism is not planned here. + +- **Nodes are not full processes.** Nodes are separate scheduling domains, but + they still run on one executor. Stronger process boundaries should be + modeled by higher-level DST harnesses. + +- **One shared virtual clock.** Nodes share one clock, so the runtime cannot + model skew or drift. If a test needs mismatched clocks, the harness should + model that above this crate. + +- **No built-in network, storage, or I/O simulation.** This crate provides + deterministic execution primitives only. Higher-level harnesses should model + message delivery, disk behavior, and failures. + +- **Not a Tokio replacement.** This crate does not aim to simulate APIs like + `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level + abstraction boundary. + +- **`spawn_blocking` is only a facade on simulation.** On the simulation + backend it currently delegates to a normal spawned task, so the closure + still runs on the single executor thread and can block runtime progress. The + direction is to avoid relying on blocking-pool semantics in simulated deep + core paths. + +- **Host randomness is not controlled.** `sim_std` warns and delegates if code + reaches OS entropy. The direction is to keep deep-core code and DST + harnesses off host randomness entirely. + +- **Not fully `no_std` or allocation-controlled yet.** The simulation core is + written with a `no_std + alloc` direction in mind, so moving its core + further in that direction should be straightforward. Today, though, hosted + glue still depends on `std`, and the runtime still allocates through normal + Rust container and task paths. Tight control over heap allocation is a + direction, not something this crate enforces yet. + +- **`NodeId` still coexists with `Node`.** The direction is to move callers + toward `Node` and reduce raw `NodeId` use over time. diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 90345998df5..429168bc6f9 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -245,10 +245,6 @@ impl Runtime { pub fn simulation(handle: sim::Handle) -> Self { Self::Simulation(handle) } - - pub fn simulation_current() -> Self { - sim_std::simulation_current() - } } impl Runtime { @@ -284,8 +280,16 @@ impl Runtime { Ok(panic_payload) => std::panic::resume_unwind(panic_payload), Err(e) => panic!("Unexpected JoinError: {e}"), }), + // This is only a facade placeholder for simulation today. It + // delegates to a normal simulated task, so the closure still runs + // on the single executor thread and can block overall runtime + // progress. Callers should not expect blocking-pool semantics on + // the simulation backend. #[cfg(feature = "simulation")] - Self::Simulation(handle) => handle.spawn_on(sim::NodeId::MAIN, async move { f() }).await, + Self::Simulation(handle) => handle + .spawn_on(sim::NodeId::MAIN, async move { f() }) + .await + .expect("simulation spawn_blocking task should not be cancelled"), #[cfg(not(any(feature = "tokio", feature = "simulation")))] _ => unreachable!("runtime dispatch has no enabled backend"), } diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 04abae27020..0c5dd0754ae 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -49,6 +49,79 @@ impl fmt::Display for NodeId { } } +/// Immutable metadata attached to one simulated node. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +struct NodeConfig { + name: Option, +} + +/// Builder for configuring a simulated node before it is created. +pub struct NodeBuilder { + handle: Handle, + config: NodeConfig, +} + +impl NodeBuilder { + /// Assign a human-readable name to the node. + pub fn name(mut self, name: impl Into) -> Self { + self.config.name = Some(name.into()); + self + } + + /// Create the node with the accumulated configuration. + pub fn build(self) -> Node { + self.handle.build_node(self.config) + } +} + +/// Handle to one simulated node in the runtime. +#[derive(Clone)] +pub struct Node { + id: NodeId, + handle: Handle, + config: Arc, +} + +impl Node { + /// Return the stable identifier for this simulated node. + pub fn id(&self) -> NodeId { + self.id + } + + /// Return the optional human-readable name for this node. + pub fn name(&self) -> Option<&str> { + self.config.name.as_deref() + } + + /// Pause scheduling for this node. + pub fn pause(&self) { + self.handle.pause(self.id); + } + + /// Resume scheduling for this node. + pub fn resume(&self) { + self.handle.resume(self.id); + } + + /// Spawn a `Send` future onto this simulated node. + pub fn spawn(&self, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.handle.spawn_on(self.id, future) + } + + /// Spawn a non-`Send` future onto this simulated node. + pub fn spawn_local(&self, future: F) -> JoinHandle + where + F: Future + 'static, + F::Output: 'static, + { + self.handle.spawn_local_on(self.id, future) + } +} + /// A small single-threaded runtime for DST's top-level future. /// /// futures are scheduled as runnables, the ready queue @@ -95,7 +168,7 @@ impl Runtime { /// /// Nodes are a scheduling/pausing boundary rather than separate executors: /// all nodes still run on the same single-threaded runtime. - pub fn create_node(&self) -> NodeId { + pub fn create_node(&self) -> NodeBuilder { self.handle().create_node() } @@ -164,11 +237,6 @@ impl Runtime { pub(crate) fn finish_determinism_check(&self) -> Result<(), alloc::string::String> { self.executor.rng.finish_determinism_check() } - - #[allow(dead_code)] - pub(crate) fn rng(&self) -> Rng { - self.executor.rng.clone() - } } /// Cloneable access to the simulation executor. @@ -179,8 +247,21 @@ pub struct Handle { impl Handle { /// Create a new simulated node owned by this runtime. - pub fn create_node(&self) -> NodeId { - self.executor.create_node() + pub fn create_node(&self) -> NodeBuilder { + NodeBuilder { + handle: self.clone(), + config: NodeConfig::default(), + } + } + + fn build_node(&self, config: NodeConfig) -> Node { + let id = self.executor.create_node(config.clone()); + let config = self.executor.node_config(id); + Node { + id, + handle: self.clone(), + config, + } } /// Pause scheduling for a node. @@ -286,14 +367,10 @@ impl JoinHandle { } impl Future for JoinHandle { - type Output = T; + type Output = Result; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - match self.as_mut().poll_join(cx) { - Poll::Ready(Ok(output)) => Poll::Ready(output), - Poll::Ready(Err(err)) => panic!("sim task: {err}"), - Poll::Pending => Poll::Pending, - } + self.as_mut().poll_join(cx) } } @@ -373,7 +450,7 @@ impl Future for Abortable { struct Executor { queue: Receiver, sender: Sender, - nodes: spin::Mutex>>, + nodes: spin::Mutex>>, next_node: AtomicU64, rng: Rng, time: TimeHandle, @@ -384,7 +461,7 @@ impl Executor { fn new(config: RuntimeConfig) -> Self { let queue = Queue::new(); let mut nodes = BTreeMap::new(); - nodes.insert(NodeId::MAIN, Arc::new(NodeState::default())); + nodes.insert(NodeId::MAIN, Arc::new(NodeRecord::default())); Self { queue: queue.receiver(), sender: queue.sender(), @@ -419,23 +496,33 @@ impl Executor { self.rng.buggify_with_prob(probability) } - fn create_node(&self) -> NodeId { + fn create_node(&self, config: NodeConfig) -> NodeId { let id = NodeId(self.next_node.fetch_add(1, Ordering::Relaxed)); - self.nodes.lock().insert(id, Arc::new(NodeState::default())); + self.nodes.lock().insert( + id, + Arc::new(NodeRecord { + config: Arc::new(config), + state: NodeState::default(), + }), + ); id } + fn node_config(&self, node: NodeId) -> Arc { + self.node_record(node).config.clone() + } + /// Mark a node as paused so newly selected runnables are buffered. fn pause(&self, node: NodeId) { - self.node_state(node).paused.store(true, Ordering::Relaxed); + self.node_record(node).state.paused.store(true, Ordering::Relaxed); } /// Mark a node as runnable again and requeue any buffered tasks for it. fn resume(&self, node: NodeId) { - let state = self.node_state(node); - state.paused.store(false, Ordering::Relaxed); + let record = self.node_record(node); + record.state.paused.store(false, Ordering::Relaxed); - let mut paused = state.paused_queue.lock(); + let mut paused = record.state.paused_queue.lock(); for runnable in paused.drain(..) { self.sender.send(runnable); } @@ -447,7 +534,7 @@ impl Executor { F: Future + Send + 'static, F::Output: Send + 'static, { - self.node_state(node); + self.assert_known_node(node); let abort = AbortHandle { state: Arc::new(AbortState::new()), @@ -468,7 +555,7 @@ impl Executor { F: Future + 'static, F::Output: 'static, { - self.node_state(node); + self.assert_known_node(node); let abort = AbortHandle { state: Arc::new(AbortState::new()), @@ -521,23 +608,35 @@ impl Executor { fn run_all_ready(&self) { while let Some(runnable) = self.queue.try_recv_random(&self.rng) { let node = *runnable.metadata(); - let state = self.node_state(node); - if state.paused.load(Ordering::Relaxed) { - state.paused_queue.lock().push(runnable); + let record = self.node_record(node); + if record.state.paused.load(Ordering::Relaxed) { + record.state.paused_queue.lock().push(runnable); continue; } + // TODO: Do some time advance here too runnable.run(); } } - /// Look up the scheduling state for a node, panicking if the node is unknown. - fn node_state(&self, node: NodeId) -> Arc { + /// Look up the record for a node, panicking if the node is unknown. + fn node_record(&self, node: NodeId) -> Arc { self.nodes .lock() .get(&node) .cloned() .unwrap_or_else(|| panic!("unknown simulated node {node}")) } + + fn assert_known_node(&self, node: NodeId) { + let _ = self.node_record(node); + } +} + +/// One simulated node's immutable metadata plus scheduler state. +#[derive(Clone, Default)] +struct NodeRecord { + config: Arc, + state: NodeState, } /// Per-node scheduler state shared by tasks assigned to that node. @@ -575,6 +674,7 @@ impl Future for YieldNow { } /// Shared runnable queue used by the simulation executor. +/// TODO: Make it generic over T struct Queue { inner: Arc, } @@ -650,12 +750,12 @@ mod tests { #[test] fn paused_node_does_not_run_until_resumed() { let mut runtime = Runtime::new(1); - let node = runtime.create_node(); - runtime.pause(node); + let node = runtime.create_node().name("paused").build(); + node.pause(); let runs = Arc::new(AtomicUsize::new(0)); let task_runs = Arc::clone(&runs); - let task = runtime.spawn_on(node, async move { + let task = node.spawn(async move { task_runs.fetch_add(1, Ordering::SeqCst); 7 }); @@ -665,8 +765,8 @@ mod tests { }); assert_eq!(runs.load(Ordering::SeqCst), 0); - runtime.resume(node); - assert_eq!(runtime.block_on(task), 7); + node.resume(); + assert_eq!(runtime.block_on(task).expect("paused task should complete"), 7); assert_eq!(runs.load(Ordering::SeqCst), 1); } @@ -676,8 +776,8 @@ mod tests { let handle = runtime.handle(); let value = runtime.block_on(async move { - let node = handle.create_node(); - handle.spawn_on(node, async { 11 }).await + let node = handle.create_node().name("spawned").build(); + node.spawn(async { 11 }).await.expect("spawned task should complete") }); assert_eq!(value, 11); @@ -713,26 +813,50 @@ mod tests { assert!(!runtime.is_buggify_enabled()); } - #[cfg(feature = "simulation")] #[test] - fn current_handle_can_spawn_local_task_inside_runtime() { - assert!(crate::sim_std::current_handle().is_none()); + fn aborted_task_returns_join_error_when_awaited() { + let mut runtime = Runtime::new(8); + let node = runtime.create_node().name("abort").build(); + let task = node.spawn(async move { + yield_now().await; + 99 + }); + task.abort_handle().abort(); + + let err = runtime + .block_on(task) + .expect_err("aborted task should surface JoinError instead of panicking"); + assert_eq!(err, JoinError); + } + #[cfg(feature = "simulation")] + #[test] + fn sim_std_block_on_can_spawn_local_task_with_explicit_handle() { let mut runtime = Runtime::new(5); - let value = crate::sim_std::block_on(&mut runtime, async { - let handle = crate::sim_std::current_handle().expect("sim handle should be present inside block_on"); - let node = handle.create_node(); + let handle = runtime.handle(); + let node = handle.create_node().name("local").build(); + let value = crate::sim_std::block_on(&mut runtime, async move { let captured = std::rc::Rc::new(17); - handle - .spawn_local_on(node, async move { - yield_now().await; - *captured - }) - .await + node.spawn_local(async move { + yield_now().await; + *captured + }) + .await + .expect("spawned local task should complete") }); assert_eq!(value, 17); - assert!(crate::sim_std::current_handle().is_none()); + } + + #[test] + fn node_builder_sets_name() { + let runtime = Runtime::new(9); + let unnamed = runtime.create_node().build(); + let named = runtime.create_node().name("replica-1").build(); + + assert_eq!(unnamed.name(), None); + assert_eq!(named.name(), Some("replica-1")); + assert_ne!(unnamed.id(), named.id()); } #[cfg(feature = "simulation")] diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index 4a87c3ef7ac..e44e2ac6707 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -9,6 +9,8 @@ mod executor; mod rng; pub mod time; -pub use executor::{yield_now, AbortHandle, Handle, JoinError, JoinHandle, NodeId, Runtime, RuntimeConfig}; +pub use executor::{ + yield_now, AbortHandle, Handle, JoinError, JoinHandle, Node, NodeBuilder, NodeId, Runtime, RuntimeConfig, +}; pub(crate) use rng::DeterminismLog; pub use rng::{GlobalRng, Rng}; diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index ed559fa70d3..65a1663429b 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -239,8 +239,8 @@ mod tests { fast_order.lock().push(3); }); - fast.await; - slow.await; + fast.await.expect("fast timer task should complete"); + slow.await.expect("slow timer task should complete"); } }); diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs index 5f936cc5aa4..cdb425a2086 100644 --- a/crates/runtime/src/sim_std.rs +++ b/crates/runtime/src/sim_std.rs @@ -6,49 +6,23 @@ //! intercepts a few libc calls so std code cannot silently escape determinism. use alloc::boxed::Box; -use core::{ - cell::{Cell, RefCell}, - future::Future, - ptr, -}; +use core::{cell::Cell, future::Future}; use std::sync::OnceLock; use crate::sim; // Public entry points. -/// Return the generic runtime facade for the current simulation thread. -/// -/// Prefer passing explicit [`sim::Handle`] values in simulation code. This is a -/// hosted convenience for code paths that already accept [`crate::Runtime`]. -pub fn simulation_current() -> crate::Runtime { - crate::Runtime::simulation(current_handle().expect("simulation runtime is not active on this thread")) -} - /// Run a future to completion with std-hosted determinism guards installed. /// /// This wraps [`sim::Runtime::block_on`] and is the normal entry point for DST /// tests that execute inside a hosted process. While the future runs, this -/// function exposes the current simulation handle, routes std randomness -/// through the simulation RNG, and marks the thread as inside simulation so OS -/// thread spawns can be rejected. +/// marks the thread as inside simulation so OS thread spawns can be rejected. pub fn block_on(runtime: &mut sim::Runtime, future: F) -> F::Output { - let _handle_context = enter_handle_context(runtime.handle()); let _system_thread_context = enter_simulation_thread(); - let _rng_context = enter_rng_context(runtime.rng()); - ensure_rng_hooks_linked(); runtime.block_on(future) } -/// Return the current simulation handle if this thread is inside [`block_on`]. -/// -/// This is intentionally the only ambient context accessor. Time, buggify, and -/// task APIs should be reached through the returned handle or through explicit -/// handles passed by the caller. -pub fn current_handle() -> Option { - CURRENT_HANDLE.with(|handle| handle.borrow().clone()) -} - /// Run the same future factory twice and assert that both runs consume the same /// deterministic RNG/scheduler trace. /// @@ -94,69 +68,28 @@ fn panic_with_seed(seed: u64, payload: Box) -> ! { // Ambient state used only while `sim_std::block_on` is driving a simulation. // -// The simulator itself stays explicit-handle based. These thread-locals exist -// because std and libc hooks do not accept a `sim::Handle` parameter, and -// because a few hosted test helpers need a current runtime while executing on -// the simulation thread. +// The simulator itself stays explicit-handle based. This thread-local only +// marks whether the current OS thread is owned by a running simulation so +// host thread creation can be rejected. thread_local! { - // Lets hosted glue recover the active runtime handle without passing it - // through every call. This should stay a convenience, not the primary API. - static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; - // Feeds deterministic bytes to host randomness calls made during an active - // simulation. Every such request advances the runtime RNG trace. - static CURRENT_RNG: RefCell> = const { RefCell::new(None) }; // Marks the current OS thread as simulation-owned so thread creation hooks // can reject accidental escapes to the host scheduler. static IN_SIMULATION: Cell = const { Cell::new(false) }; } -struct CurrentHandleGuard { - previous: Option, -} - -struct CurrentRngGuard { - previous: Option, -} - struct SimulationThreadGuard { previous: bool, } -fn enter_handle_context(handle: sim::Handle) -> CurrentHandleGuard { - let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); - CurrentHandleGuard { previous } -} - fn enter_simulation_thread() -> SimulationThreadGuard { let previous = IN_SIMULATION.with(|state| state.replace(true)); SimulationThreadGuard { previous } } -fn enter_rng_context(rng: sim::GlobalRng) -> CurrentRngGuard { - let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); - CurrentRngGuard { previous } -} - fn in_simulation() -> bool { IN_SIMULATION.with(Cell::get) } -impl Drop for CurrentHandleGuard { - fn drop(&mut self) { - CURRENT_HANDLE.with(|slot| { - *slot.borrow_mut() = self.previous.take(); - }); - } -} - -impl Drop for CurrentRngGuard { - fn drop(&mut self) { - CURRENT_RNG.with(|current| { - current.replace(self.previous.take()); - }); - } -} - impl Drop for SimulationThreadGuard { fn drop(&mut self) { IN_SIMULATION.with(|state| { @@ -165,41 +98,6 @@ impl Drop for SimulationThreadGuard { } } -// Randomness hook helpers. - -// Make sure our exported random hook is present in the final test binary. -// -// Some platforms only resolve getrandom/getentropy lazily. Calling it with a -// zero-length buffer is a no-op for behavior, but forces the symbol path to be -// linked before simulation code starts depending on it. -fn ensure_rng_hooks_linked() { - unsafe { - // Force the local getentropy symbol to be linked even if the host std - // library does not call it during this particular test. - getentropy(ptr::null_mut(), 0); - } -} - -// Fill bytes from the current runtime RNG when host code asks for randomness -// during an active simulation. -// -// This is the intentional deterministic substitute for OS randomness. If no -// simulation RNG is installed, the caller is outside `sim_std::block_on` and -// the libc hook should warn before delegating to the host OS. -fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { - CURRENT_RNG.with(|current| { - let Some(rng) = current.borrow().clone() else { - return false; - }; - if buflen == 0 { - return true; - } - let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; - rng.fill_bytes(buf); - true - }) -} - // Thread hook. // Hook Unix thread creation by interposing `pthread_attr_init`. @@ -236,19 +134,12 @@ unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc: // Hook OS randomness by interposing `getrandom`. // -// Code running inside simulation consumes bytes from the runtime RNG. Code -// outside simulation warns and falls back to host randomness so hosted test -// code continues to work. +// This crate no longer tries to make host randomness deterministic. Any such +// request is surfaced with a warning and then delegated to the host OS. #[unsafe(no_mangle)] #[inline(never)] unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { - if fill_from_current_rng(buf, buflen) { - // Randomness requested while a simulation is active is deterministic - // and advances the runtime RNG trace. - return buflen as isize; - } - - eprintln!("warning: randomness requested outside simulation; delegating to host OS"); + eprintln!("warning: randomness requested; delegating to host OS"); unsafe { real_getrandom()(buf, buflen, flags) } } @@ -289,7 +180,7 @@ unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { mod tests { use crate::sim; - use super::{enter_rng_context, getentropy}; + use super::getentropy; #[test] #[cfg(unix)] @@ -301,22 +192,6 @@ mod tests { }); } - #[test] - fn getentropy_uses_current_sim_rng() { - let rng = sim::GlobalRng::new(20); - let _guard = enter_rng_context(rng.clone()); - - let mut actual = [0u8; 24]; - unsafe { - assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); - } - - let expected_rng = sim::GlobalRng::new(20); - let mut expected = [0u8; 24]; - expected_rng.fill_bytes(&mut expected); - assert_eq!(actual, expected); - } - #[test] fn getentropy_delegates_to_host_randomness_outside_simulation() { let mut actual = [0u8; 24]; diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs index dea5dda6555..3757ee187fa 100644 --- a/crates/runtime/tests/sim_e2e.rs +++ b/crates/runtime/tests/sim_e2e.rs @@ -9,25 +9,252 @@ use futures::{ use spacetimedb_runtime::sim::{buggify, Rng, Runtime}; use spin::Mutex; +/// One reply produced by the simulated server. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct Response { + id: u64, + value: u64, + at: Duration, +} + +/// Trace entries recorded by the server so tests can assert schedule/fault outcomes. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum ServerEvent { + Received { id: u64, at: Duration }, + Dropped { id: u64, at: Duration }, + Replied { id: u64, at: Duration }, +} + +/// A client request submitted to the simulated server. +struct Request { + id: u64, + input: u64, + respond_to: oneshot::Sender, +} + +/// Complete result of the client/server workload for one seed. +#[derive(Debug, Eq, PartialEq)] +struct ClientServerRun { + responses: Vec<(u64, Option)>, + server_events: Vec, + elapsed: Duration, +} + +/// Checks the "same seed, same trace" side of the client/server workload. +/// Both the client-visible results and the server-side event trace should stay +/// stable for one fixed seed. +#[test] +fn client_server_buggify_injects_deterministic_faults() { + let run = run_buggified_client_server(404); + + assert_eq!( + run.responses, + vec![ + (0, None), + ( + 1, + Some(Response { + id: 1, + value: 50, + at: Duration::from_millis(2), + }), + ), + ( + 2, + Some(Response { + id: 2, + value: 70, + at: Duration::from_millis(3), + }), + ), + (3, None), + ( + 4, + Some(Response { + id: 4, + value: 110, + at: Duration::from_millis(5), + }), + ), + ] + ); + assert_eq!( + run.server_events, + vec![ + ServerEvent::Received { + id: 3, + at: Duration::ZERO, + }, + ServerEvent::Received { + id: 0, + at: Duration::ZERO, + }, + ServerEvent::Received { + id: 2, + at: Duration::ZERO, + }, + ServerEvent::Received { + id: 4, + at: Duration::ZERO, + }, + ServerEvent::Received { + id: 1, + at: Duration::ZERO, + }, + ServerEvent::Dropped { + id: 0, + at: Duration::from_millis(1), + }, + ServerEvent::Replied { + id: 1, + at: Duration::from_millis(2), + }, + ServerEvent::Replied { + id: 2, + at: Duration::from_millis(3), + }, + ServerEvent::Dropped { + id: 3, + at: Duration::from_millis(4), + }, + ServerEvent::Replied { + id: 4, + at: Duration::from_millis(5), + }, + ] + ); + assert_eq!(run.elapsed, Duration::from_millis(5)); +} + +/// Checks the "different seed, different exploration" side of the same +/// client/server workload. The full run result should differ across seeds. +#[test] +fn client_server_buggify_differs_across_seeds() { + let seed_404 = run_buggified_client_server(404); + let seed_405 = run_buggified_client_server(405); + + eprintln!("seed 404: {seed_404:#?}"); + eprintln!("seed 405: {seed_405:#?}"); + assert_ne!(seed_404, seed_405); +} + +/// Fixed request set used by the client workload. +const CLIENT_REQUESTS: [(u64, u64); 5] = [(0, 4), (1, 5), (2, 7), (3, 9), (4, 11)]; + +/// Run a small concurrent client/server workload under one seed. +/// +/// The client submits every request from its own simulated task. The server +/// receives requests in scheduler order, then spawns one worker per request. +/// Each worker sleeps for deterministic virtual latency and may drop the reply +/// based on buggify. +fn run_buggified_client_server(seed: u64) -> ClientServerRun { + let mut runtime = Runtime::new(seed); + buggify::enable(&runtime); + let handle = runtime.handle(); + let client_node = runtime.create_node().name("client").build(); + let server_node = runtime.create_node().name("server").build(); + let (request_tx, mut request_rx) = mpsc::unbounded::(); + let server_events = Arc::new(Mutex::new(Vec::new())); + + let (responses, server_events) = runtime.block_on(async move { + let server_handle = handle.clone(); + let server_events_for_server = Arc::clone(&server_events); + let server = server_node.clone().spawn(async move { + let mut workers = Vec::new(); + for _ in 0..5 { + let request = request_rx.next().await.expect("client should send request"); + server_events_for_server.lock().push(ServerEvent::Received { + id: request.id, + at: server_handle.now(), + }); + + let worker_handle = server_handle.clone(); + let worker_events = Arc::clone(&server_events_for_server); + workers.push(server_node.clone().spawn(async move { + worker_handle.sleep(Duration::from_millis(request.id + 1)).await; + if worker_handle.buggify_with_prob(0.4) { + worker_events.lock().push(ServerEvent::Dropped { + id: request.id, + at: worker_handle.now(), + }); + return; + } + + let response = Response { + id: request.id, + value: request.input * 10, + at: worker_handle.now(), + }; + worker_events.lock().push(ServerEvent::Replied { + id: request.id, + at: response.at, + }); + request + .respond_to + .send(response) + .expect("client should wait for response"); + })); + } + + for worker in workers { + worker.await.expect("server worker should complete"); + } + }); + + let client_outer_node = client_node.clone(); + let client = client_node.spawn(async move { + let mut requests = Vec::new(); + for (id, input) in CLIENT_REQUESTS { + let request_tx = request_tx.clone(); + let client_request_node = client_outer_node.clone(); + requests.push(client_request_node.spawn(async move { + let (respond_to, response_rx) = oneshot::channel(); + request_tx + .unbounded_send(Request { id, input, respond_to }) + .expect("server inbox should be open"); + (id, response_rx.await.ok()) + })); + } + drop(request_tx); + + let mut responses = Vec::new(); + for request in requests { + responses.push(request.await.expect("client request task should complete")); + } + responses + }); + + let responses = client.await.expect("client task should complete"); + server.await.expect("server task should complete"); + (responses, server_events.lock().clone()) + }); + + ClientServerRun { + responses, + server_events, + elapsed: runtime.elapsed(), + } +} + +/// Exercises the executor, node pause/resume, and timer wheel together: +/// paused node work must not run until resumed, and all nodes must observe +/// one shared virtual clock. #[test] fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { - // Exercises the executor, node pause/resume, and timer wheel together: - // paused node work must not run until resumed, and all nodes must observe - // one shared virtual clock. let mut runtime = Runtime::new(101); let handle = runtime.handle(); - let node_a = runtime.create_node(); - let node_b = runtime.create_node(); + let node_a = runtime.create_node().name("a").build(); + let node_b = runtime.create_node().name("b").build(); let events = Arc::new(Mutex::new(Vec::new())); - runtime.pause(node_b); + node_b.pause(); runtime.block_on({ let events = Arc::clone(&events); async move { let a_handle = handle.clone(); let a_events = Arc::clone(&events); - let a = handle.spawn_on(node_a, async move { + let a = node_a.spawn(async move { a_events.lock().push(("a_started", a_handle.now())); a_handle.sleep(Duration::from_millis(3)).await; a_events.lock().push(("a_finished", a_handle.now())); @@ -35,7 +262,7 @@ fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { let b_handle = handle.clone(); let b_events = Arc::clone(&events); - let b = handle.spawn_on(node_b, async move { + let b = node_b.spawn(async move { b_events.lock().push(("b_started", b_handle.now())); b_handle.sleep(Duration::from_millis(2)).await; b_events.lock().push(("b_finished", b_handle.now())); @@ -43,10 +270,10 @@ fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { handle.sleep(Duration::from_millis(1)).await; events.lock().push(("main_resumed_b", handle.now())); - handle.resume(node_b); + node_b.resume(); - a.await; - b.await; + a.await.expect("node a task should complete"); + b.await.expect("node b task should complete"); } }); @@ -59,91 +286,10 @@ fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { assert_eq!(runtime.elapsed(), Duration::from_millis(3)); } -#[test] -fn client_server_request_response_uses_virtual_time() { - // Models a small client/server exchange without real networking: the client - // sends requests over an in-memory channel, and the server replies after - // deterministic virtual latency on a different simulated node. - #[derive(Clone, Copy, Debug, Eq, PartialEq)] - struct Response { - id: u64, - value: u64, - at: Duration, - } - - struct Request { - id: u64, - input: u64, - respond_to: oneshot::Sender, - } - - let mut runtime = Runtime::new(404); - let handle = runtime.handle(); - let client_node = runtime.create_node(); - let server_node = runtime.create_node(); - let (request_tx, mut request_rx) = mpsc::unbounded::(); - - let responses = runtime.block_on(async move { - let server_handle = handle.clone(); - let server = handle.spawn_on(server_node, async move { - for _ in 0..3 { - let request = request_rx.next().await.expect("client should send request"); - server_handle.sleep(Duration::from_millis(request.id + 1)).await; - request - .respond_to - .send(Response { - id: request.id, - value: request.input * 10, - at: server_handle.now(), - }) - .expect("client should wait for response"); - } - }); - - let client = handle.spawn_on(client_node, async move { - let mut responses = Vec::new(); - for (id, input) in [(2, 7), (0, 4), (1, 5)] { - let (respond_to, response_rx) = oneshot::channel(); - request_tx - .unbounded_send(Request { id, input, respond_to }) - .expect("server inbox should be open"); - responses.push(response_rx.await.expect("server should reply")); - } - responses - }); - - let responses = client.await; - server.await; - responses - }); - - assert_eq!( - responses, - vec![ - Response { - id: 2, - value: 70, - at: Duration::from_millis(3) - }, - Response { - id: 0, - value: 40, - at: Duration::from_millis(4) - }, - Response { - id: 1, - value: 50, - at: Duration::from_millis(6) - }, - ] - ); - assert_eq!(runtime.elapsed(), Duration::from_millis(6)); -} - +/// Checks that runtime-owned buggify decisions consume the same seeded RNG +/// sequence as an explicit `Rng`, making injected faults replayable by seed. #[test] fn runtime_buggify_matches_standalone_rng_sequence() { - // Checks that runtime-owned buggify decisions consume the same seeded RNG - // sequence as an explicit `Rng`, making injected faults replayable by seed. let seed = 77; let runtime = Runtime::new(seed); let expected = Rng::new(seed); @@ -164,19 +310,19 @@ fn runtime_buggify_matches_standalone_rng_sequence() { assert!(!buggify::should_inject_fault_with_prob(&runtime, 1.0)); } +/// Verifies timeout races are driven by virtual time, not wall time: the fast +/// node completes at 2ms, then the slow node times out at the shared 4ms +/// deadline. #[test] fn multi_node_timeout_uses_shared_virtual_clock() { - // Verifies timeout races are driven by virtual time, not wall time: the - // fast node completes at 2ms, then the slow node times out at the shared - // 4ms deadline. let mut runtime = Runtime::new(303); let handle = runtime.handle(); - let slow_node = runtime.create_node(); - let fast_node = runtime.create_node(); + let slow_node = runtime.create_node().name("slow").build(); + let fast_node = runtime.create_node().name("fast").build(); let output = runtime.block_on(async move { let slow_handle = handle.clone(); - let slow = handle.spawn_on(slow_node, async move { + let slow = slow_node.spawn(async move { slow_handle .timeout(Duration::from_millis(4), async { slow_handle.sleep(Duration::from_millis(10)).await; @@ -186,12 +332,15 @@ fn multi_node_timeout_uses_shared_virtual_clock() { }); let fast_handle = handle.clone(); - let fast = handle.spawn_on(fast_node, async move { + let fast = fast_node.spawn(async move { fast_handle.sleep(Duration::from_millis(2)).await; ("fast-finished", fast_handle.now()) }); - (slow.await, fast.await) + ( + slow.await.expect("slow node task should complete"), + fast.await.expect("fast node task should complete"), + ) }); let (slow, fast) = output; From 8cd609cc44865332d74c36ac70c3945795e9b836 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 17:22:16 +0530 Subject: [PATCH 54/74] update readme --- crates/runtime/README.md | 48 ++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 41af4cee621..a7b2fdefa71 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -4,13 +4,14 @@ under deterministic simulation testing (DST). DST runs code inside a deterministic simulator that controls nondeterministic -inputs instead of letting them come directly from host environment. Given same +inputs instead of letting them come directly from the OS and real runtime +environment. Given same seed, simulator should produce same trace. When it finds a bug, seed should be enough to reproduce that bug exactly. For this to work, code under test must not read clocks, randomness, scheduling, I/O, or network behavior directly from outer environment. Those -effects need interfaces that production can implement with hosted services and +effects need interfaces that production can implement with real runtime-backed services and DST can replace with simulated ones. This crate provides the execution-control part of that boundary: spawning, @@ -23,11 +24,13 @@ higher-level abstractions. [src/lib.rs](./src/lib.rs) exposes `Runtime`, small runtime handle shared code carries. It has two variants: -- `Runtime::Tokio(TokioHandle)` for hosted execution. +- `Runtime::Tokio(TokioHandle)` for real runtime execution. - `Runtime::Simulation(sim::Handle)` for deterministic simulation. [src/sim](./src/sim) contains simulation core. It is single-threaded and aims -toward `no_std + alloc` over time. This includes: +toward `no_std + alloc` over time. It is written with dependency reduction in +mind, though it still uses a small set of runtime-support crates today. +The module includes: - `executor`: single-threaded task scheduler with deterministic runnable selection. - `time`: virtual clock, sleeps, and timeouts. @@ -35,11 +38,11 @@ toward `no_std + alloc` over time. This includes: - `buggify`: seeded fault-injection decisions. - `node`: node builders and node-local scheduling handles. -[src/sim_std.rs](./src/sim_std.rs) contains hosted glue around simulator: +[src/sim_std.rs](./src/sim_std.rs) contains `std`/OS glue around the simulator: -- `block_on` installs hosted simulation guards for tests. +- `block_on` installs simulation guards for tests running in a normal process. - `check_determinism` replays same seeded workload twice and compares trace. -- libc randomness hooks warn and delegate if code reaches host entropy. +- libc randomness hooks warn and delegate if code reaches OS entropy. - Unix thread hooks reject accidental `std::thread::spawn` while simulation is active. @@ -49,8 +52,8 @@ Tokio integration is intentionally small and lives directly in Feature flags: -- `tokio`: enables hosted runtime backend and remains in default feature set. -- `simulation`: enables deterministic simulation runtime and hosted `sim_std` +- `tokio`: enables tokio runtime backend and remains in default feature set. +- `simulation`: enables deterministic simulation runtime and `sim_std` helpers. ## Scope and Limitations @@ -58,15 +61,10 @@ Feature flags: - **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating - real parallelism is not planned here. + real parallelism is out of scope. -- **Nodes are not full processes.** Nodes are separate scheduling domains, but - they still run on one executor. Stronger process boundaries should be - modeled by higher-level DST harnesses. - -- **One shared virtual clock.** Nodes share one clock, so the runtime cannot - model skew or drift. If a test needs mismatched clocks, the harness should - model that above this crate. +- **One shared virtual clock.** Nodes share one clock, so thir clock can not + drift to show mismatched time. - **No built-in network, storage, or I/O simulation.** This crate provides deterministic execution primitives only. Higher-level harnesses should model @@ -76,22 +74,18 @@ Feature flags: `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level abstraction boundary. -- **`spawn_blocking` is only a facade on simulation.** On the simulation +- **`spawn_blocking` is only a facade on simulation runtime.** On the simulation backend it currently delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The - direction is to avoid relying on blocking-pool semantics in simulated deep - core paths. + direction is to avoid relying on blocking-pool semantics. -- **Host randomness is not controlled.** `sim_std` warns and delegates if code - reaches OS entropy. The direction is to keep deep-core code and DST - harnesses off host randomness entirely. +- **OS randomness is not controlled.** `sim_std` warns if code reaches OS + entropy. The direction is to keep application code and testing harnesses off + OS randomness entirely. - **Not fully `no_std` or allocation-controlled yet.** The simulation core is written with a `no_std + alloc` direction in mind, so moving its core - further in that direction should be straightforward. Today, though, hosted + further in that direction should be straightforward. Today, though, `std`/OS glue still depends on `std`, and the runtime still allocates through normal Rust container and task paths. Tight control over heap allocation is a direction, not something this crate enforces yet. - -- **`NodeId` still coexists with `Node`.** The direction is to move callers - toward `Node` and reduce raw `NodeId` use over time. From 730028f47765eb2f5d91a850f5036240f9dfa553 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 18:14:51 +0530 Subject: [PATCH 55/74] Runtime -> Handle --- crates/core/src/db/durability.rs | 4 ++-- crates/core/src/db/persistence.rs | 12 ++++++------ crates/core/src/db/relational_db.rs | 15 +++++++-------- crates/core/src/db/snapshot.rs | 10 +++++----- crates/durability/src/imp/local.rs | 16 ++++++++-------- crates/runtime/src/lib.rs | 20 ++++---------------- 6 files changed, 32 insertions(+), 45 deletions(-) diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index d712630a63a..f749f72850a 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -11,7 +11,7 @@ use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; use crate::db::persistence::Durability; -use spacetimedb_runtime::Runtime; +use spacetimedb_runtime::Handle; pub(super) fn request_durability( durability: &Durability, @@ -32,7 +32,7 @@ pub(super) fn request_durability( })); } -pub(super) fn spawn_close(durability: Arc, runtime: &Runtime, database_identity: Identity) { +pub(super) fn spawn_close(durability: Arc, runtime: &Handle, database_identity: Identity) { let label = format!("[{database_identity}]"); let runtime = runtime.clone(); runtime.clone().spawn(async move { diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index dbd7e42c22c..ce3ef5d6841 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -7,7 +7,7 @@ use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::DynSnapshotRepo; use crate::{messages::control_db::Database, util::asyncify}; -use spacetimedb_runtime::Runtime; +use spacetimedb_runtime::Handle; use super::{ relational_db::{self, Txdata}, @@ -43,7 +43,7 @@ pub struct Persistence { /// this type. pub snapshots: Option, /// Runtime onto which durability-related tasks shall be spawned. - pub runtime: Runtime, + pub runtime: Handle, } impl Persistence { @@ -54,14 +54,14 @@ impl Persistence { snapshots: Option, runtime: tokio::runtime::Handle, ) -> Self { - Self::new_with_runtime(durability, disk_size, snapshots, Runtime::tokio(runtime)) + Self::new_with_runtime(durability, disk_size, snapshots, Handle::tokio(runtime)) } pub fn new_with_runtime( durability: impl spacetimedb_durability::Durability + 'static, disk_size: impl Fn() -> io::Result + Send + Sync + 'static, snapshots: Option, - runtime: Runtime, + runtime: Handle, ) -> Self { Self { durability: Arc::new(durability), @@ -101,7 +101,7 @@ impl Persistence { Option>, Option, Option, - Option, + Option, ) { this.map( |Self { @@ -153,7 +153,7 @@ impl PersistenceProvider for LocalPersistenceProvider { async fn persistence(&self, database: &Database, replica_id: u64) -> anyhow::Result { let replica_dir = self.data_dir.replica(replica_id); let snapshot_dir = replica_dir.snapshots(); - let runtime = Runtime::tokio_current(); + let runtime = Handle::tokio_current(); let database_identity = database.database_identity; let snapshot_worker = diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index ae76eef632a..e72741577bb 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -44,7 +44,7 @@ use spacetimedb_lib::Identity; use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; -use spacetimedb_runtime::Runtime; +use spacetimedb_runtime::Handle; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductType, ProductValue}; @@ -102,7 +102,7 @@ pub struct RelationalDB { inner: Locking, durability: Option>, - durability_runtime: Option, + durability_runtime: Option, snapshot_worker: Option, row_count_fn: RowCountFn, @@ -136,7 +136,6 @@ impl std::fmt::Debug for RelationalDB { impl Drop for RelationalDB { fn drop(&mut self) { - log::info!("starting drop"); // Attempt to flush the outstanding transactions. if let (Some(durability), Some(runtime)) = (self.durability.take(), self.durability_runtime.take()) { spawn_durability_close(durability, &runtime, self.database_identity); @@ -1673,7 +1672,7 @@ pub type LocalDurability = Arc>; /// of the commitlog. pub async fn local_durability( replica_dir: ReplicaDir, - runtime: Runtime, + runtime: Handle, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { let on_new_segment = snapshot_worker.map(|snapshot_worker| { @@ -1954,12 +1953,12 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { - SnapshotWorker::new(repo, snapshot::Compression::Disabled, Runtime::tokio(rt.clone())) + SnapshotWorker::new(repo, snapshot::Compression::Disabled, Handle::tokio(rt.clone())) }) }) .transpose()?; - let runtime = Runtime::tokio(rt.clone()); + let runtime = Handle::tokio(rt.clone()); let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), runtime.clone(), snapshots.as_ref()))?; let history = local.as_history(); @@ -2082,11 +2081,11 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { - SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio(rt.clone())) + SnapshotWorker::new(repo, snapshot::Compression::Enabled, Handle::tokio(rt.clone())) }) }) .transpose()?; - let runtime = Runtime::tokio(rt.clone()); + let runtime = Handle::tokio(rt.clone()); let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), runtime.clone(), snapshots.as_ref()))?; let history = local.as_history(); diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 63f582b68b4..c02e5f6246c 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -17,8 +17,8 @@ use spacetimedb_lib::Identity; use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; use tokio::sync::watch; -use crate::{db::snapshot, worker_metrics::WORKER_METRICS}; -use spacetimedb_runtime::Runtime; +use crate::worker_metrics::WORKER_METRICS; +use spacetimedb_runtime::Handle; pub type SnapshotDatabaseState = Arc>; @@ -70,7 +70,7 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repo: Arc, compression: Compression, runtime: Runtime) -> Self { + pub fn new(snapshot_repo: Arc, compression: Compression, runtime: Handle) -> Self { let database = snapshot_repo.database_identity(); let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); @@ -172,7 +172,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, - runtime: Runtime, + runtime: Handle, compression: Option, } @@ -317,7 +317,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, - runtime: Runtime, + runtime: Handle, } impl Compressor { diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 7a0c29760e1..e3eca56e5d9 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -19,7 +19,7 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; -use spacetimedb_runtime::{JoinHandle, Runtime}; +use spacetimedb_runtime::{Handle, JoinHandle}; use thiserror::Error; use tokio::sync::watch; use tracing::{instrument, Span}; @@ -123,7 +123,7 @@ impl Local { /// This is used to capture a snapshot each new segment. pub fn open( replica_dir: ReplicaDir, - runtime: Runtime, + rt: Handle, opts: Options, on_new_segment: Option>, ) -> Result { @@ -138,7 +138,7 @@ impl Local { opts.commitlog, on_new_segment, )?); - Self::open_inner(clog, runtime, opts, Some(lock)) + Self::open_inner(clog, rt, opts, Some(lock)) } } @@ -148,7 +148,7 @@ where R: RepoWithoutLockFile + Send + Sync + 'static, { /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, rt: Runtime, opts: Options) -> Result { + pub fn open_with_repo(repo: R, rt: Handle, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); Self::open_inner(clog, rt, opts, None) @@ -162,7 +162,7 @@ where { fn open_inner( clog: Arc, R>>, - runtime: Runtime, + rt: Handle, opts: Options, lock: Option, ) -> Result { @@ -170,13 +170,13 @@ where let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); let (durable_tx, durable_rx) = watch::channel(clog.max_committed_offset()); - let actor = runtime.spawn( + let actor = rt.spawn( Actor { clog: clog.clone(), durable_offset: durable_tx, queue_depth: queue_depth.clone(), batch_capacity: opts.batch_capacity, - runtime: runtime.clone(), + runtime: rt.clone(), lock, } .run(txdata_rx), @@ -241,7 +241,7 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, - runtime: Runtime, + runtime: Handle, #[allow(unused)] lock: Option, diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 429168bc6f9..65e35c8793f 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -20,21 +20,9 @@ pub mod sim_std; #[cfg(feature = "tokio")] pub type TokioHandle = tokio::runtime::Handle; -#[cfg(feature = "tokio")] -pub type TokioRuntime = tokio::runtime::Runtime; - -#[cfg(feature = "tokio")] -pub fn current_handle_or_new_runtime() -> std::io::Result<(TokioHandle, Option)> { - if let Ok(handle) = TokioHandle::try_current() { - return Ok((handle, None)); - } - - let runtime = TokioRuntime::new()?; - Ok((runtime.handle().clone(), Some(runtime))) -} #[derive(Clone)] -pub enum Runtime { +pub enum Handle { #[cfg(feature = "tokio")] Tokio(TokioHandle), #[cfg(feature = "simulation")] @@ -230,7 +218,7 @@ impl fmt::Display for RuntimeTimeout { impl std::error::Error for RuntimeTimeout {} #[cfg(feature = "tokio")] -impl Runtime { +impl Handle { pub fn tokio(handle: TokioHandle) -> Self { Self::Tokio(handle) } @@ -241,13 +229,13 @@ impl Runtime { } #[cfg(feature = "simulation")] -impl Runtime { +impl Handle { pub fn simulation(handle: sim::Handle) -> Self { Self::Simulation(handle) } } -impl Runtime { +impl Handle { pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle<()> { #[cfg(not(any(feature = "tokio", feature = "simulation")))] let _ = future; From 35cbea9baaad81e83034d669ab697141358f9d8d Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 18:29:44 +0530 Subject: [PATCH 56/74] Apply suggestions from code review Co-authored-by: Shubham Mishra Signed-off-by: Shubham Mishra --- crates/commitlog/src/lib.rs | 4 ---- crates/core/src/db/relational_db.rs | 2 +- crates/core/src/db/snapshot.rs | 20 +++++++++---------- .../subscription/module_subscription_actor.rs | 2 +- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index 075de217ff9..d2f6f20cdd5 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -214,10 +214,6 @@ where } } -impl Commitlog -where - R: Repo, -{ /// Determine the maximum transaction offset considered durable. /// /// The offset is `None` if the log hasn't been flushed to disk yet. diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index e72741577bb..fd96c3067c6 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -2081,7 +2081,7 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { - SnapshotWorker::new(repo, snapshot::Compression::Enabled, Handle::tokio(rt.clone())) + SnapshotWorker::new(repo, snapshot::Compression::Disabled, Handle::tokio(rt.clone())) }) }) .transpose()?; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index c02e5f6246c..047da5cc308 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -70,20 +70,20 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repo: Arc, compression: Compression, runtime: Handle) -> Self { - let database = snapshot_repo.database_identity(); - let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); + pub fn new(snapshot_repository: Arc, compression: Compression, runtime: Handle) -> Self { + let database = snapshot_repository.database_identity(); + let latest_snapshot = snapshot_repository..latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repo: snapshot_repo.clone(), + snapshot_repository.: snapshot_repo.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), runtime: runtime.clone(), compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repo.clone(), + snapshot_repository.: snapshot_repo.clone(), metrics: CompressionMetrics::new(database), stats: <_>::default(), runtime: runtime.clone(), @@ -94,7 +94,7 @@ impl SnapshotWorker { Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository: snapshot_repo, + snapshot_repository, } } @@ -172,7 +172,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, - runtime: Handle, + rt: Handle, compression: Option, } @@ -317,7 +317,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, - runtime: Handle, + rt: Handle, } impl Compressor { @@ -349,8 +349,8 @@ impl Compressor { let range = start..latest_snapshot; let mut stats = self.stats.take().unwrap_or_default(); - let runtime = self.runtime.clone(); - let (mut stats, res) = runtime + let rt = self.rt.clone(); + let (mut stats, res) = rt .spawn_blocking({ let range = range.clone(); move || { diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index f9c9b13ae04..4c94df74ab8 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2103,7 +2103,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: spacetimedb_runtime::Runtime::tokio(rt), + runtime: spacetimedb_runtime::Handle::tokio(rt), }), None, 0, From 5af7fd91497b70b2a93ded78b966054dc7dc6e70 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 18:30:26 +0530 Subject: [PATCH 57/74] Update crates/commitlog/src/lib.rs Signed-off-by: Shubham Mishra --- crates/commitlog/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index d2f6f20cdd5..abc8729c978 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -212,7 +212,6 @@ where inner: RwLock::new(inner), }) } -} /// Determine the maximum transaction offset considered durable. /// From 52783ce598247f25122ac3117b816a784e0a6a1c Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 18:50:59 +0530 Subject: [PATCH 58/74] compile fix --- crates/core/src/db/snapshot.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 047da5cc308..178bbda3d72 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -70,26 +70,26 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression, runtime: Handle) -> Self { + pub fn new(snapshot_repository: Arc, compression: Compression, rt: Handle) -> Self { let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository..latest_snapshot().ok().flatten().unwrap_or(0); + let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repository.: snapshot_repo.clone(), + snapshot_repo: snapshot_repository.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), - runtime: runtime.clone(), + rt: rt.clone(), compression: compression.is_enabled().then(|| Compressor { - snapshot_repository.: snapshot_repo.clone(), + snapshot_repo: snapshot_repository.clone(), metrics: CompressionMetrics::new(database), stats: <_>::default(), - runtime: runtime.clone(), + rt: rt.clone(), }), }; - runtime.spawn(actor.run()); + rt.spawn(actor.run()); Self { snapshot_created, @@ -172,7 +172,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, - rt: Handle, + rt: Handle, compression: Option, } @@ -224,7 +224,7 @@ impl SnapshotWorkerActor { let inner_timer = self.metrics.snapshot_timing_inner.clone(); let snapshot_repo = self.snapshot_repo.clone(); - let runtime = self.runtime.clone(); + let runtime = self.rt.clone(); let database_identity = self.snapshot_repo.database_identity(); @@ -317,7 +317,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, - rt: Handle, + rt: Handle, } impl Compressor { From 30012dbfe71047140fc4bd863d4495f6ba528346 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 19:23:54 +0530 Subject: [PATCH 59/74] lint --- Cargo.lock | 1 + crates/core/src/db/relational_db.rs | 2 -- crates/snapshot/Cargo.toml | 1 + crates/snapshot/tests/remote.rs | 4 ++-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea1503a9863..e117cf6d300 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8668,6 +8668,7 @@ dependencies = [ "spacetimedb-lib 2.2.0", "spacetimedb-paths", "spacetimedb-primitives 2.2.0", + "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "spacetimedb-schema", "spacetimedb-table", diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index fd96c3067c6..57230e8866b 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -40,8 +40,6 @@ use spacetimedb_lib::db::raw_def::v9::{btree, RawModuleDefV9Builder, RawSql}; use spacetimedb_lib::st_var::StVarValue; use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; -#[cfg(test)] -use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; use spacetimedb_runtime::Handle; diff --git a/crates/snapshot/Cargo.toml b/crates/snapshot/Cargo.toml index f9f767ce18e..aa51c4e3bd8 100644 --- a/crates/snapshot/Cargo.toml +++ b/crates/snapshot/Cargo.toml @@ -35,6 +35,7 @@ spacetimedb-core = { path = "../core", features = ["test"] } spacetimedb-schema = { path = "../schema" } spacetimedb-datastore = { path = "../datastore", features = ["test"] } spacetimedb-durability = { workspace = true, features = ["test"] } +spacetimedb-runtime = { workspace = true } anyhow.workspace = true env_logger.workspace = true diff --git a/crates/snapshot/tests/remote.rs b/crates/snapshot/tests/remote.rs index 81d67bc2ec5..1c6c51fe8e7 100644 --- a/crates/snapshot/tests/remote.rs +++ b/crates/snapshot/tests/remote.rs @@ -10,7 +10,7 @@ use spacetimedb::{ snapshot::{self, SnapshotWorker}, }, error::DBError, - runtime, Identity, + Identity, }; use spacetimedb_datastore::execution_context::Workload; use spacetimedb_datastore::locking_tx_datastore::datastore::Locking; @@ -227,7 +227,7 @@ impl SourceSnapshot { async fn create_snapshot(repo: Arc) -> anyhow::Result { let start = Instant::now(); - let rt = runtime::Runtime::tokio_current(); + let rt = spacetimedb_runtime::Handle::tokio_current(); // NOTE: `_db` needs to stay alive until the snapshot is taken, // because the snapshot worker holds only a weak reference. let (mut watch, _db) = spawn_blocking(|| { From d9f009b32b46afff87bb1cf0e25c2863a790135e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 19:26:04 +0530 Subject: [PATCH 60/74] fix Cargo.toml --- crates/standalone/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/standalone/Cargo.toml b/crates/standalone/Cargo.toml index 3bc7335625a..180b3a60b4c 100644 --- a/crates/standalone/Cargo.toml +++ b/crates/standalone/Cargo.toml @@ -54,7 +54,7 @@ serde_json.workspace = true sled.workspace = true socket2.workspace = true thiserror.workspace = true -tokio = { workspace = true, features = ["full"] } +tokio.workspace = true tower-http.workspace = true toml.workspace = true tracing = { workspace = true, features = ["release_max_level_debug"] } From 3b767256b72dc2aad1a5a079f82ecb39784014e0 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 19:54:56 +0530 Subject: [PATCH 61/74] endlines on README --- crates/runtime/README.md | 85 ++++++++++++---------------------------- 1 file changed, 24 insertions(+), 61 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index a7b2fdefa71..11ae570caa4 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,36 +1,21 @@ # spacetimedb-runtime -`spacetimedb-runtime` is runtime boundary that lets SpacetimeDB core code run -under deterministic simulation testing (DST). +`spacetimedb-runtime` is a runtime boundary that lets SpacetimeDB core code run under deterministic simulation testing (DST). -DST runs code inside a deterministic simulator that controls nondeterministic -inputs instead of letting them come directly from the OS and real runtime -environment. Given same -seed, simulator should produce same trace. When it finds a bug, seed should be -enough to reproduce that bug exactly. +DST runs code inside a deterministic simulator that controls nondeterministic inputs instead of letting them come directly from the OS and real runtime environment. Given the same seed, the simulator should produce the same trace. When it finds a bug, the seed should be enough to reproduce that bug exactly. -For this to work, code under test must not read clocks, randomness, -scheduling, I/O, or network behavior directly from outer environment. Those -effects need interfaces that production can implement with real runtime-backed services and -DST can replace with simulated ones. +For this to work, code under test must not read clocks, randomness, scheduling, I/O, or network behavior directly from the outer environment. Those effects need interfaces that production can implement with real runtime-backed services and DST can replace with simulated ones. -This crate provides the execution-control part of that boundary: spawning, -timeouts, virtual time, deterministic randomness, task scheduling, and fault -decisions. Storage, networking, and replication should be modeled through -higher-level abstractions. +This crate provides the execution-control part of that boundary: spawning, timeouts, virtual time, deterministic randomness, task scheduling, and fault decisions. Storage, networking, and replication should be modeled through higher-level abstractions. ## Architecture -[src/lib.rs](./src/lib.rs) exposes `Runtime`, small runtime handle shared code -carries. It has two variants: +[src/lib.rs](./src/lib.rs) exposes `Runtime`, a small runtime handle shared code carries. It has two variants: - `Runtime::Tokio(TokioHandle)` for real runtime execution. - `Runtime::Simulation(sim::Handle)` for deterministic simulation. -[src/sim](./src/sim) contains simulation core. It is single-threaded and aims -toward `no_std + alloc` over time. It is written with dependency reduction in -mind, though it still uses a small set of runtime-support crates today. -The module includes: +[src/sim](./src/sim) contains the simulation core. It is single-threaded and aims toward `no_std + alloc` over time. It is written with dependency reduction in mind, though it still uses a small set of runtime-support crates today. The module includes: - `executor`: single-threaded task scheduler with deterministic runnable selection. - `time`: virtual clock, sleeps, and timeouts. @@ -41,51 +26,29 @@ The module includes: [src/sim_std.rs](./src/sim_std.rs) contains `std`/OS glue around the simulator: - `block_on` installs simulation guards for tests running in a normal process. -- `check_determinism` replays same seeded workload twice and compares trace. +- `check_determinism` replays the same seeded workload twice and compares traces. - libc randomness hooks warn and delegate if code reaches OS entropy. -- Unix thread hooks reject accidental `std::thread::spawn` while simulation is - active. - -Tokio integration is intentionally small and lives directly in -[src/lib.rs](./src/lib.rs). +- Unix thread hooks reject accidental `std::thread::spawn` while simulation is active. +Tokio integration is intentionally small and lives directly in [src/lib.rs](./src/lib.rs). Feature flags: -- `tokio`: enables tokio runtime backend and remains in default feature set. -- `simulation`: enables deterministic simulation runtime and `sim_std` - helpers. +- `tokio`: enables the Tokio runtime backend and remains in the default feature set. +- `simulation`: enables the deterministic simulation runtime and `sim_std` helpers. ## Scope and Limitations -- **Single-threaded runtime.** The simulator exposes interleaving and timeout - bugs, but not bugs that require true parallel execution. The direction is to - keep deep-core code single-threaded or close to thread-per-core; simulating - real parallelism is out of scope. - -- **One shared virtual clock.** Nodes share one clock, so thir clock can not - drift to show mismatched time. - -- **No built-in network, storage, or I/O simulation.** This crate provides - deterministic execution primitives only. Higher-level harnesses should model - message delivery, disk behavior, and failures. - -- **Not a Tokio replacement.** This crate does not aim to simulate APIs like - `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level - abstraction boundary. - -- **`spawn_blocking` is only a facade on simulation runtime.** On the simulation - backend it currently delegates to a normal spawned task, so the closure - still runs on the single executor thread and can block runtime progress. The - direction is to avoid relying on blocking-pool semantics. - -- **OS randomness is not controlled.** `sim_std` warns if code reaches OS - entropy. The direction is to keep application code and testing harnesses off - OS randomness entirely. - -- **Not fully `no_std` or allocation-controlled yet.** The simulation core is - written with a `no_std + alloc` direction in mind, so moving its core - further in that direction should be straightforward. Today, though, `std`/OS - glue still depends on `std`, and the runtime still allocates through normal - Rust container and task paths. Tight control over heap allocation is a - direction, not something this crate enforces yet. +- **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating real parallelism is out of scope. + +- **One shared virtual clock.** Nodes share one clock, so their clocks cannot drift to show mismatched time. + +- **No built-in network, storage, or I/O simulation.** This crate provides deterministic execution primitives only. Higher-level harnesses should model message delivery, disk behavior, and failures. + +- **Not a Tokio replacement.** This crate does not aim to simulate APIs like `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level abstraction boundary. + +- **`spawn_blocking` is only a facade on the simulation runtime.** On the simulation backend, it currently delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The direction is to avoid relying on blocking-pool semantics. + +- **OS randomness is not controlled.** `sim_std` warns if code reaches OS entropy. The direction is to keep application code and testing harnesses off OS randomness entirely. + +- **Not fully `no_std` or allocation-controlled yet.** The simulation core is written with a `no_std + alloc` direction in mind, so moving its core further in that direction should be straightforward. Today, though, `std`/OS glue still depends on `std`, and the runtime still allocates through normal Rust container and task paths. Tight control over heap allocation is a direction, not something this crate enforces yet. From 9996a16170696f55d1323db7f130f025fa0cfa74 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 20:18:58 +0530 Subject: [PATCH 62/74] comments --- crates/runtime/src/lib.rs | 6 ------ crates/runtime/src/sim/executor.rs | 2 -- crates/runtime/src/sim/mod.rs | 6 ------ crates/runtime/src/sim/time/mod.rs | 2 -- 4 files changed, 16 deletions(-) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 65e35c8793f..f93889e7490 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,9 +1,3 @@ -#![cfg_attr(not(any(feature = "tokio", feature = "simulation")), no_std)] - -//! Runtime and deterministic simulation utilities shared by core and DST. - -extern crate alloc; - use core::{ fmt, future::Future, diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 0c5dd0754ae..43fd9f6d922 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -1,5 +1,3 @@ -//! Minimal asynchronous executor adapted from madsim's `sim/task` loop. - use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; use core::{ fmt, diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index e44e2ac6707..ccdcc104991 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -1,9 +1,3 @@ -//! Local deterministic simulation runtime. -//! -//! This module is deliberately small, but its executor shape follows madsim's: -//! futures are scheduled as runnable tasks and the ready queue is sampled by a -//! deterministic RNG instead of being driven by a package-level async runtime. - pub mod buggify; mod executor; mod rng; diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index 65a1663429b..70a6a037187 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -1,5 +1,3 @@ -//! Virtual time for the local simulation runtime. - mod sleep; use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; From d5992a2eba38ab5e0e372942824376b3dc17dace Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 13:01:07 +0530 Subject: [PATCH 63/74] add extern alloc --- crates/runtime/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index f93889e7490..d2850e87c8f 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,3 +1,4 @@ +extern crate alloc; use core::{ fmt, future::Future, From 6079ef00ae1d96bd9934db4ced9010029922bcfa Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 16:45:55 +0530 Subject: [PATCH 64/74] remove futures dependency --- crates/runtime/Cargo.toml | 7 ++++--- crates/runtime/README.md | 29 ++++++++++++++++++----------- crates/runtime/src/sim/executor.rs | 9 ++++++--- crates/runtime/src/sim/time/mod.rs | 28 ++++++++++++++++------------ crates/runtime/tests/sim_e2e.rs | 16 ++++++++++++++++ 5 files changed, 60 insertions(+), 29 deletions(-) diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index 0460432086b..4cd0af60869 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -10,13 +10,14 @@ rust-version.workspace = true workspace = true [dependencies] -futures.workspace = true -futures-util.workspace = true tokio = { workspace = true, optional = true } -async-task = { version = "4.4", optional = true } +async-task = { version = "4.4", default-features = false, optional = true } spin = { version = "0.9", default-features = false, features = ["mutex", "spin_mutex"], optional = true } libc = { version = "0.2", optional = true } +[dev-dependencies] +futures.workspace = true + [features] default = ["tokio"] tokio = ["dep:tokio"] diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 11ae570caa4..970a17889c4 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -10,17 +10,17 @@ This crate provides the execution-control part of that boundary: spawning, timeo ## Architecture -[src/lib.rs](./src/lib.rs) exposes `Runtime`, a small runtime handle shared code carries. It has two variants: +[src/lib.rs](./src/lib.rs) exposes `Handle`, a small runtime handle shared code carries. It has two variants: -- `Runtime::Tokio(TokioHandle)` for real runtime execution. -- `Runtime::Simulation(sim::Handle)` for deterministic simulation. +- `Handle::Tokio(TokioHandle)` for real runtime execution. +- `Handle::Simulation(sim::Handle)` for deterministic simulation. -[src/sim](./src/sim) contains the simulation core. It is single-threaded and aims toward `no_std + alloc` over time. It is written with dependency reduction in mind, though it still uses a small set of runtime-support crates today. The module includes: +[src/sim](./src/sim) contains the simulation core. It is single-threaded and targets `no_std + alloc`. The module includes: - `executor`: single-threaded task scheduler with deterministic runnable selection. - `time`: virtual clock, sleeps, and timeouts. - `rng`: seeded deterministic randomness for scheduler and workload decisions. -- `buggify`: seeded fault-injection decisions. +- `buggify`: fault-injection surface. Calls rng to decide probabilistically whether to inject failures into simulated operations. - `node`: node builders and node-local scheduling handles. [src/sim_std.rs](./src/sim_std.rs) contains `std`/OS glue around the simulator: @@ -37,18 +37,25 @@ Feature flags: - `tokio`: enables the Tokio runtime backend and remains in the default feature set. - `simulation`: enables the deterministic simulation runtime and `sim_std` helpers. -## Scope and Limitations +## Related documents -- **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating real parallelism is out of scope. +- **[DETERMINISM_COVERAGE.md](./DETERMINISM_COVERAGE.md)** — tracks nondeterminism surfaces. + +## Design Principles -- **One shared virtual clock.** Nodes share one clock, so their clocks cannot drift to show mismatched time. +- **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating real parallelism is out of scope. - **No built-in network, storage, or I/O simulation.** This crate provides deterministic execution primitives only. Higher-level harnesses should model message delivery, disk behavior, and failures. - **Not a Tokio replacement.** This crate does not aim to simulate APIs like `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level abstraction boundary. -- **`spawn_blocking` is only a facade on the simulation runtime.** On the simulation backend, it currently delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The direction is to avoid relying on blocking-pool semantics. +- **Zero dependency.** The simulation core in `sim/` is already `no_std + alloc`. The `sim_std` module is a thin OS-facing wrapper — the std dependency lives there, not in the simulation core itself. It stays until the application logic above this crate also moves to `no_std`. -- **OS randomness is not controlled.** `sim_std` warns if code reaches OS entropy. The direction is to keep application code and testing harnesses off OS randomness entirely. +## Current Limitations -- **Not fully `no_std` or allocation-controlled yet.** The simulation core is written with a `no_std + alloc` direction in mind, so moving its core further in that direction should be straightforward. Today, though, `std`/OS glue still depends on `std`, and the runtime still allocates through normal Rust container and task paths. Tight control over heap allocation is a direction, not something this crate enforces yet. + +- **One shared virtual clock.** All simulated nodes share a single clock. This masks bugs related to timing mismatch across machines. + +- **`spawn_blocking` is only a facade on the simulation backend.** It delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The direction is to avoid relying on blocking-pool semantics. + +- **OS randomness is not controlled.** `sim_std` warns if code reaches OS entropy. The direction is to keep application code and testing harnesses off OS randomness entirely. diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 43fd9f6d922..c064bca804f 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -8,7 +8,6 @@ use core::{ time::Duration, }; -use futures_util::FutureExt; use spin::Mutex; use crate::sim::{time::TimeHandle, Rng}; @@ -578,7 +577,7 @@ impl Executor { /// nor timers remain, the simulation is considered deadlocked. fn block_on(&self, future: F) -> F::Output { let sender = self.sender.clone(); - let (runnable, task) = unsafe { + let (runnable, mut task) = unsafe { async_task::Builder::new() .metadata(NodeId::MAIN) .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) @@ -588,7 +587,11 @@ impl Executor { loop { self.run_all_ready(); if task.is_finished() { - return task.now_or_never().expect("finished task should resolve"); + let waker = Waker::noop(); + return match Pin::new(&mut task).poll(&mut Context::from_waker(&waker)) { + Poll::Ready(output) => output, + Poll::Pending => unreachable!("task.is_finished() was true"), + }; } if self.time.wake_next_timer() { diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index 70a6a037187..de0a201d9f2 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -1,9 +1,7 @@ mod sleep; use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; -use core::{fmt, future::Future, task::Waker, time::Duration}; - -use futures_util::{select_biased, FutureExt}; +use core::{fmt, future::Future, pin::pin, task::{Poll, Waker}, time::Duration}; use sleep::wake_all; use spin::Mutex; @@ -117,18 +115,24 @@ impl TimeHandle { /// Race a future against a virtual-time sleep. /// - /// This is implemented as `future` versus `sleep(duration)` using a biased - /// select. If both become ready in the same simulated step, the main - /// future wins the tie so completion beats timeout deterministically. + /// Uses a biased `poll_fn` that polls `future` before `sleep`. If both are + /// ready in the same step, the main future wins — completion beats timeout + /// deterministically. pub async fn timeout(&self, duration: Duration, future: impl Future) -> Result { let sleep = self.sleep(duration); - futures::pin_mut!(future); - futures::pin_mut!(sleep); + let mut future = pin!(future); + let mut sleep = pin!(sleep); - select_biased! { - output = future.fuse() => Ok(output), - () = sleep.fuse() => Err(TimeoutElapsed { duration }), - } + core::future::poll_fn(|cx| { + if let Poll::Ready(output) = future.as_mut().poll(cx) { + return Poll::Ready(Ok(output)); + } + if let Poll::Ready(()) = sleep.as_mut().poll(cx) { + return Poll::Ready(Err(TimeoutElapsed { duration })); + } + Poll::Pending + }) + .await } } diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs index 3757ee187fa..1f505696801 100644 --- a/crates/runtime/tests/sim_e2e.rs +++ b/crates/runtime/tests/sim_e2e.rs @@ -148,19 +148,23 @@ const CLIENT_REQUESTS: [(u64, u64); 5] = [(0, 4), (1, 5), (2, 7), (3, 9), (4, 11 /// Each worker sleeps for deterministic virtual latency and may drop the reply /// based on buggify. fn run_buggified_client_server(seed: u64) -> ClientServerRun { + // --- setup: runtime, buggify, two nodes, and communication channels --- let mut runtime = Runtime::new(seed); buggify::enable(&runtime); let handle = runtime.handle(); let client_node = runtime.create_node().name("client").build(); let server_node = runtime.create_node().name("server").build(); + // mpsc channel: client tasks send Request messages to the server task let (request_tx, mut request_rx) = mpsc::unbounded::(); let server_events = Arc::new(Mutex::new(Vec::new())); let (responses, server_events) = runtime.block_on(async move { + // --- server: receive 5 requests, spawn one worker per request --- let server_handle = handle.clone(); let server_events_for_server = Arc::clone(&server_events); let server = server_node.clone().spawn(async move { let mut workers = Vec::new(); + // Receive all 5 requests before processing any replies for _ in 0..5 { let request = request_rx.next().await.expect("client should send request"); server_events_for_server.lock().push(ServerEvent::Received { @@ -168,10 +172,13 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { at: server_handle.now(), }); + // --- server worker: simulate latency, then drop or reply based on buggify --- let worker_handle = server_handle.clone(); let worker_events = Arc::clone(&server_events_for_server); workers.push(server_node.clone().spawn(async move { + // Deterministic virtual latency: each request id has a distinct sleep worker_handle.sleep(Duration::from_millis(request.id + 1)).await; + // buggify decides whether to drop this request (40% probability) if worker_handle.buggify_with_prob(0.4) { worker_events.lock().push(ServerEvent::Dropped { id: request.id, @@ -180,6 +187,7 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { return; } + // No fault injected: send the reply let response = Response { id: request.id, value: request.input * 10, @@ -196,14 +204,17 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { })); } + // Wait for all server workers to complete for worker in workers { worker.await.expect("server worker should complete"); } }); + // --- client: spawn one task per request, send them to server, collect responses --- let client_outer_node = client_node.clone(); let client = client_node.spawn(async move { let mut requests = Vec::new(); + // Spawn a task for each request so they submit concurrently for (id, input) in CLIENT_REQUESTS { let request_tx = request_tx.clone(); let client_request_node = client_outer_node.clone(); @@ -212,11 +223,14 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { request_tx .unbounded_send(Request { id, input, respond_to }) .expect("server inbox should be open"); + // Await the server's reply (None if the server dropped this request) (id, response_rx.await.ok()) })); } + // All requests sent, close the channel so the server loop terminates drop(request_tx); + // Collect responses in spawn order let mut responses = Vec::new(); for request in requests { responses.push(request.await.expect("client request task should complete")); @@ -224,11 +238,13 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { responses }); + // Drive both client and server to completion let responses = client.await.expect("client task should complete"); server.await.expect("server task should complete"); (responses, server_events.lock().clone()) }); + // --- package the results: client responses, server trace, and total virtual time --- ClientServerRun { responses, server_events, From 8601d78ad44c4887b389e7a8c8fd3dd0bec02e0e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 01:14:54 +0530 Subject: [PATCH 65/74] coverage matrics --- crates/runtime/DETERMINISM_COVERAGE.md | 58 ++++++++++++++++++++++++++ crates/runtime/README.md | 2 + crates/runtime/src/sim_std.rs | 1 + 3 files changed, 61 insertions(+) create mode 100644 crates/runtime/DETERMINISM_COVERAGE.md diff --git a/crates/runtime/DETERMINISM_COVERAGE.md b/crates/runtime/DETERMINISM_COVERAGE.md new file mode 100644 index 00000000000..4df6c2a6df9 --- /dev/null +++ b/crates/runtime/DETERMINISM_COVERAGE.md @@ -0,0 +1,58 @@ +# Determinism Coverage + +This document tracks which sources of nondeterminism are under control in `spacetimedb-runtime`, which ones are only constrained by current architecture, and which ones still escape the simulator boundary. + +It is meant to serve two purposes: + +1. Make the current determinism boundary explicit for runtime code, core crates, and DST harnesses. +2. Provide a place to record and review assumptions when a PR changes that boundary. + +## Status Definitions + +- `Controlled` + The simulator or runtime owns this source of nondeterminism directly. Given the same seed and the same simulated inputs, behavior should replay the same way. + +- `Constrained` + This surface is not fully simulator-controlled, but the current architecture limits how it is used. Replay should remain stable if those constraints continue to hold. + +- `Audited` + This surface is not mechanically controlled. Current usage has been reviewed and is believed not to affect replay, but that guarantee depends on call patterns and can regress. + +- `Known Leak` + This source can currently escape simulator control and affect replay. It should be treated as explicit technical debt or a documented exception. + +- `Out of Scope` + This crate does not try to control this surface. If it matters for DST, it must be modeled by a higher-level abstraction or test harness. + +## Control Matrix + +| Surface | Status | Boundary | Current control or assumption | Failure mode if violated | Required direction | +| --- | --- | --- | --- | --- | --- | +| Executor scheduling | Controlled | `runtime::sim::executor` | Runnable selection is driven by seeded simulator RNG | Replay diverges across runs | Keep simulated task scheduling inside the sim executor | +| Simulated task lifecycle | Controlled | `runtime::sim::{executor, JoinHandle}` | Spawn, wake, cancel, and join all happen inside simulator-owned scheduling | Cancellation and join behavior diverge across runs | Keep lifecycle transitions on simulator-owned tasks only | +| Virtual time and timers | Controlled | `runtime::sim::time` | Simulated time advances only through explicit advance or next-timer jump | Timeouts and ordering become host-timing dependent | Keep timer progression fully simulator-owned | +| Runtime RNG and buggify | Controlled | `runtime::sim::rng` | Runtime RNG drives scheduler and probabilistic fault-injection decisions | RNG and fault decisions are not replayable | Keep simulator-owned randomness explicit and seed-driven | +| OS thread creation during simulation | Controlled | `runtime::sim_std` | Unix thread hook rejects `std::thread::spawn` while simulation is active | Host scheduler escapes simulator control | Keep simulated work on simulator tasks, not OS threads | +| OS entropy | Known Leak | `runtime::sim_std` | Randomness requests warn and then delegate to the OS | Same seed can produce different traces | Add backtrace to warnings, remove call sites, eventually fail closed or fully model the source | +| `HashMap` randomized iteration | Audited | Runtime and caller code | Runtime does not force deterministic hash seeding; correctness must not depend on iteration order | Hidden ordering dependencies cause flaky replay | Prefer ordered maps or explicit sorting where observable order matters | +| `tokio::sync` primitives | Constrained | Core crates above runtime | These can be replay-compatible only when all participating tasks remain simulator-owned and progress stays on simulator-controlled async paths | Wake ordering or blocking semantics diverge once code depends on a real runtime or host-driven progress | Audit per primitive and push deep-core paths toward runtime-owned or single-threaded structures | +| `parking_lot::{}` and `std::sync::{}` | Constrained | Core crates, especially datastore | Safe only where access stays single-threaded or non-contended under DST | Host synchronization leaks nondeterministic acquisition order | Keep out of deep-core execution paths; prefer runtime-owned or single-threaded structures | +| File and network I/O | Out of Scope | Runtime crate | Runtime does not simulate filesystem or network behavior | Real I/O timing, ordering, and errors are not replayable | Model via domain-specific DST abstractions | +| Tokio runtime ownership | Constrained | `spacetimedb_runtime::Handle` / shared core APIs | Shared code uses a narrow runtime boundary instead of concrete Tokio subsystems | Concrete Tokio APIs leak into DST-facing core paths | Keep shared code on runtime or domain abstractions, not raw Tokio services | +| Heap allocation and OOM | Known Leak | Broad, especially deep-core direction | Allocation happens through normal Rust paths; deterministic allocation failure is not modeled | Resource-exhaustion behavior is not reproducible | Move the simulation core and eventually deep-core paths toward `no_std + alloc` with explicit allocation boundaries | +| Snapshot / commitlog / datastore host effects | Out of Scope | Higher-level durability and storage layers | Runtime only provides scheduling, time, and fault-decision primitives | Storage semantics depend on real host behavior unless wrapped | Model durable behavior through domain-specific DST abstractions | + +## Scope Notes + +This document covers the runtime crate and the determinism boundary it exposes to core crates and DST harnesses. + +`Controlled` is the target state for nondeterminism surfaces that must participate directly in deterministic simulation testing. `Constrained` and `Audited` are temporary states: they may be acceptable for a period, but they are not strong guarantees. `Known Leak` marks places where replay can still depend on host behavior. `Out of Scope` does not mean unimportant; it means control must live in another layer. + +## Update Rule + +A PR should update this document if it: + +- introduces a new source of nondeterminism, +- changes the control status of an existing surface, +- adds a new assumption about single-threading, iteration order, runtime ownership, or host behavior, or +- removes a leak or upgrades a surface from `Audited` or `Constrained` to `Controlled`. diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 970a17889c4..4fece270157 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -8,6 +8,8 @@ For this to work, code under test must not read clocks, randomness, scheduling, This crate provides the execution-control part of that boundary: spawning, timeouts, virtual time, deterministic randomness, task scheduling, and fault decisions. Storage, networking, and replication should be modeled through higher-level abstractions. +For a tracked view of what is currently under simulator control, what is only constrained by convention, and what still leaks host behavior, see [DETERMINISM_COVERAGE.md](./DETERMINISM_COVERAGE.md). + ## Architecture [src/lib.rs](./src/lib.rs) exposes `Handle`, a small runtime handle shared code carries. It has two variants: diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs index cdb425a2086..cbdcd299b49 100644 --- a/crates/runtime/src/sim_std.rs +++ b/crates/runtime/src/sim_std.rs @@ -140,6 +140,7 @@ unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc: #[inline(never)] unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { eprintln!("warning: randomness requested; delegating to host OS"); + eprintln!("{}", std::backtrace::Backtrace::force_capture()); unsafe { real_getrandom()(buf, buflen, flags) } } From b27b02132deec7042f4edafcb98115ff6fa751b3 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 16:52:44 +0530 Subject: [PATCH 66/74] update determinism coverage --- Cargo.lock | 1 - crates/runtime/DETERMINISM_COVERAGE.md | 16 ++++------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e117cf6d300..812c63f88ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8482,7 +8482,6 @@ version = "2.2.0" dependencies = [ "async-task", "futures", - "futures-util", "libc", "spin", "tokio", diff --git a/crates/runtime/DETERMINISM_COVERAGE.md b/crates/runtime/DETERMINISM_COVERAGE.md index 4df6c2a6df9..076efb865e4 100644 --- a/crates/runtime/DETERMINISM_COVERAGE.md +++ b/crates/runtime/DETERMINISM_COVERAGE.md @@ -28,26 +28,18 @@ It is meant to serve two purposes: | Surface | Status | Boundary | Current control or assumption | Failure mode if violated | Required direction | | --- | --- | --- | --- | --- | --- | -| Executor scheduling | Controlled | `runtime::sim::executor` | Runnable selection is driven by seeded simulator RNG | Replay diverges across runs | Keep simulated task scheduling inside the sim executor | -| Simulated task lifecycle | Controlled | `runtime::sim::{executor, JoinHandle}` | Spawn, wake, cancel, and join all happen inside simulator-owned scheduling | Cancellation and join behavior diverge across runs | Keep lifecycle transitions on simulator-owned tasks only | -| Virtual time and timers | Controlled | `runtime::sim::time` | Simulated time advances only through explicit advance or next-timer jump | Timeouts and ordering become host-timing dependent | Keep timer progression fully simulator-owned | -| Runtime RNG and buggify | Controlled | `runtime::sim::rng` | Runtime RNG drives scheduler and probabilistic fault-injection decisions | RNG and fault decisions are not replayable | Keep simulator-owned randomness explicit and seed-driven | -| OS thread creation during simulation | Controlled | `runtime::sim_std` | Unix thread hook rejects `std::thread::spawn` while simulation is active | Host scheduler escapes simulator control | Keep simulated work on simulator tasks, not OS threads | +| Executor scheduling | Controlled | `runtime::sim::executor` | Runnable selection is driven by seeded simulator RNG | Replay diverges across runs | - | +| Virtual time and timers | Controlled | `runtime::sim::time` | Simulated time advances only through explicit advance or next-timer jump | Timeouts and ordering become host-timing dependent | - | +| Runtime RNG and buggify | Controlled | `runtime::sim::rng` | Runtime RNG drives scheduler and probabilistic fault-injection decisions | RNG and fault decisions are not replayable | - | +| OS thread creation during simulation | Controlled | `runtime::sim_std` | Unix thread hook rejects `std::thread::spawn` while simulation is active | Host scheduler escapes simulator control | - | | OS entropy | Known Leak | `runtime::sim_std` | Randomness requests warn and then delegate to the OS | Same seed can produce different traces | Add backtrace to warnings, remove call sites, eventually fail closed or fully model the source | | `HashMap` randomized iteration | Audited | Runtime and caller code | Runtime does not force deterministic hash seeding; correctness must not depend on iteration order | Hidden ordering dependencies cause flaky replay | Prefer ordered maps or explicit sorting where observable order matters | | `tokio::sync` primitives | Constrained | Core crates above runtime | These can be replay-compatible only when all participating tasks remain simulator-owned and progress stays on simulator-controlled async paths | Wake ordering or blocking semantics diverge once code depends on a real runtime or host-driven progress | Audit per primitive and push deep-core paths toward runtime-owned or single-threaded structures | | `parking_lot::{}` and `std::sync::{}` | Constrained | Core crates, especially datastore | Safe only where access stays single-threaded or non-contended under DST | Host synchronization leaks nondeterministic acquisition order | Keep out of deep-core execution paths; prefer runtime-owned or single-threaded structures | | File and network I/O | Out of Scope | Runtime crate | Runtime does not simulate filesystem or network behavior | Real I/O timing, ordering, and errors are not replayable | Model via domain-specific DST abstractions | -| Tokio runtime ownership | Constrained | `spacetimedb_runtime::Handle` / shared core APIs | Shared code uses a narrow runtime boundary instead of concrete Tokio subsystems | Concrete Tokio APIs leak into DST-facing core paths | Keep shared code on runtime or domain abstractions, not raw Tokio services | | Heap allocation and OOM | Known Leak | Broad, especially deep-core direction | Allocation happens through normal Rust paths; deterministic allocation failure is not modeled | Resource-exhaustion behavior is not reproducible | Move the simulation core and eventually deep-core paths toward `no_std + alloc` with explicit allocation boundaries | | Snapshot / commitlog / datastore host effects | Out of Scope | Higher-level durability and storage layers | Runtime only provides scheduling, time, and fault-decision primitives | Storage semantics depend on real host behavior unless wrapped | Model durable behavior through domain-specific DST abstractions | -## Scope Notes - -This document covers the runtime crate and the determinism boundary it exposes to core crates and DST harnesses. - -`Controlled` is the target state for nondeterminism surfaces that must participate directly in deterministic simulation testing. `Constrained` and `Audited` are temporary states: they may be acceptable for a period, but they are not strong guarantees. `Known Leak` marks places where replay can still depend on host behavior. `Out of Scope` does not mean unimportant; it means control must live in another layer. - ## Update Rule A PR should update this document if it: From 4f6ca2334c7db2137ac7b999e83b15881fa08f02 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 17:05:59 +0530 Subject: [PATCH 67/74] put extern alloc behing gate --- crates/runtime/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index d2850e87c8f..aef70fcaf1a 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,4 +1,6 @@ +#[cfg(feature = "simulation")] extern crate alloc; + use core::{ fmt, future::Future, From 651f8c6745cd99a86746fdfefd25c03587d1585d Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 17:49:14 +0530 Subject: [PATCH 68/74] join handle cleanup --- crates/runtime/src/lib.rs | 130 ++++++++++++++++++++++------------ crates/runtime/src/sim_std.rs | 6 +- 2 files changed, 89 insertions(+), 47 deletions(-) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index aef70fcaf1a..dfc96a3c870 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -36,9 +36,9 @@ pub struct AbortHandle { enum JoinHandleInner { #[cfg(feature = "tokio")] - Tokio(Option>), + Tokio(tokio::task::JoinHandle), #[cfg(feature = "simulation")] - Simulation(Option>), + Simulation(sim::JoinHandle), Detached(PhantomData), } @@ -104,41 +104,21 @@ impl JoinHandleInner { fn abort_handle(&self) -> AbortHandle { match self { #[cfg(feature = "tokio")] - Self::Tokio(Some(handle)) => AbortHandle { + Self::Tokio(handle) => AbortHandle { inner: AbortHandleInner::Tokio(handle.abort_handle()), }, #[cfg(feature = "simulation")] - Self::Simulation(Some(handle)) => AbortHandle { + Self::Simulation(handle) => AbortHandle { inner: AbortHandleInner::Simulation(handle.abort_handle()), }, - #[cfg(feature = "tokio")] - Self::Tokio(None) => panic!("runtime join handle aborted after detach"), - #[cfg(feature = "simulation")] - Self::Simulation(None) => panic!("runtime join handle aborted after detach"), - Self::Detached(_) => panic!("runtime join handle aborted after completion"), - } - } - - fn detach(&mut self) { - match self { - #[cfg(feature = "tokio")] - Self::Tokio(handle) => { - drop(handle.take()); - } - #[cfg(feature = "simulation")] - Self::Simulation(handle) => { - if let Some(handle) = handle.take() { - handle.detach(); - } - } - Self::Detached(_) => {} + Self::Detached(_) => unreachable!("abort_handle called on a completed handle"), } } fn poll_result(&mut self, cx: &mut Context<'_>) -> Poll> { match self { #[cfg(feature = "tokio")] - Self::Tokio(Some(handle)) => match Pin::new(handle).poll(cx) { + Self::Tokio(handle) => match Pin::new(handle).poll(cx) { Poll::Ready(Ok(output)) => Poll::Ready(Ok(output)), Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { inner: JoinErrorInner::Tokio(err), @@ -146,18 +126,14 @@ impl JoinHandleInner { Poll::Pending => Poll::Pending, }, #[cfg(feature = "simulation")] - Self::Simulation(Some(handle)) => match Pin::new(handle).poll_join(cx) { + Self::Simulation(handle) => match Pin::new(handle).poll_join(cx) { Poll::Ready(Ok(output)) => Poll::Ready(Ok(output)), Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { inner: JoinErrorInner::Simulation(err), })), Poll::Pending => Poll::Pending, }, - #[cfg(feature = "tokio")] - Self::Tokio(None) => panic!("runtime join handle polled after detach"), - #[cfg(feature = "simulation")] - Self::Simulation(None) => panic!("runtime join handle polled after detach"), - Self::Detached(_) => panic!("runtime join handle polled after completion"), + Self::Detached(_) => unreachable!("poll_result called on a completed handle"), } } } @@ -166,15 +142,6 @@ impl JoinHandle { pub fn abort_handle(&self) -> AbortHandle { self.inner.abort_handle() } - - pub fn detach(mut self) { - self.detach_inner(); - } - - fn detach_inner(&mut self) { - self.inner.detach(); - self.inner = JoinHandleInner::Detached(PhantomData); - } } impl Future for JoinHandle { @@ -196,7 +163,14 @@ impl Future for JoinHandle { impl Drop for JoinHandle { fn drop(&mut self) { - self.detach_inner(); + let inner = core::mem::replace(&mut self.inner, JoinHandleInner::Detached(PhantomData)); + #[cfg(feature = "simulation")] + if let JoinHandleInner::Simulation(handle) = inner { + handle.detach(); + return; + } + // For Tokio (and Detached), dropping the handle does not cancel the task. + drop(inner); } } @@ -233,17 +207,17 @@ impl Handle { } impl Handle { - pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle<()> { + pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle { #[cfg(not(any(feature = "tokio", feature = "simulation")))] let _ = future; match self { #[cfg(feature = "tokio")] Self::Tokio(handle) => JoinHandle { - inner: JoinHandleInner::Tokio(Some(handle.spawn(future))), + inner: JoinHandleInner::Tokio(handle.spawn(future)), }, #[cfg(feature = "simulation")] Self::Simulation(handle) => JoinHandle { - inner: JoinHandleInner::Simulation(Some(handle.spawn_on(sim::NodeId::MAIN, future))), + inner: JoinHandleInner::Simulation(handle.spawn_on(sim::NodeId::MAIN, future)), }, #[cfg(not(any(feature = "tokio", feature = "simulation")))] _ => unreachable!("runtime dispatch has no enabled backend"), @@ -299,3 +273,69 @@ impl Handle { } } } + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }; + + + #[cfg(feature = "simulation")] + #[test] + fn dropping_joinhandle_does_not_cancel_task_in_simulation() { + use crate::sim::Runtime; + let mut rt = Runtime::new(4); + let handle = Handle::simulation(rt.handle()); + let flag = Arc::new(AtomicBool::new(false)); + let flag_clone = flag.clone(); + + rt.block_on(async { + let jh = handle.spawn(async move { + flag_clone.store(true, Ordering::Release); + }); + drop(jh); + + // Yield so the spawned task gets polled. + handle + .timeout(std::time::Duration::from_millis(50), async {}) + .await + .ok(); + }); + + assert!(flag.load(Ordering::Acquire)); + } + + #[cfg(feature = "simulation")] + #[test] + fn abort_cancels_task_in_simulation() { + use crate::sim::Runtime; + let mut rt = Runtime::new(4); + let handle = Handle::simulation(rt.handle()); + let flag = Arc::new(AtomicBool::new(false)); + let flag_clone = flag.clone(); + let handle_for_spawn = handle.clone(); + + rt.block_on(async move { + let jh = handle.spawn(async move { + // Sleep long enough that abort fires first. + handle_for_spawn + .timeout(std::time::Duration::from_millis(100), async {}) + .await + .ok(); + flag_clone.store(true, Ordering::Release); + }); + jh.abort_handle().abort(); + + let result = jh.await; + // wait to see, above task indeed cancelled. + let _ = handle + .timeout(std::time::Duration::from_millis(500), async {}) + .await; + assert!(result.is_err()); + assert!(!flag.load(Ordering::Acquire)); + }); + } +} diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs index cbdcd299b49..08f82b9495e 100644 --- a/crates/runtime/src/sim_std.rs +++ b/crates/runtime/src/sim_std.rs @@ -139,8 +139,10 @@ unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc: #[unsafe(no_mangle)] #[inline(never)] unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { - eprintln!("warning: randomness requested; delegating to host OS"); - eprintln!("{}", std::backtrace::Backtrace::force_capture()); + if in_simulation() { + eprintln!("warning: randomness requested; delegating to host OS"); + eprintln!("{}", std::backtrace::Backtrace::force_capture()); + } unsafe { real_getrandom()(buf, buflen, flags) } } From 74b283c4fa9aefb65d466b8117cfd25fb6b2bf70 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 17:54:11 +0530 Subject: [PATCH 69/74] update readme --- crates/runtime/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 4fece270157..18bb2289113 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,3 +1,5 @@ +> Welcome to the Matrix! + # spacetimedb-runtime `spacetimedb-runtime` is a runtime boundary that lets SpacetimeDB core code run under deterministic simulation testing (DST). @@ -39,10 +41,6 @@ Feature flags: - `tokio`: enables the Tokio runtime backend and remains in the default feature set. - `simulation`: enables the deterministic simulation runtime and `sim_std` helpers. -## Related documents - -- **[DETERMINISM_COVERAGE.md](./DETERMINISM_COVERAGE.md)** — tracks nondeterminism surfaces. - ## Design Principles - **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating real parallelism is out of scope. From 15b98a0cdfbd780050a9e3f4796263ede12d4c10 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 18:00:51 +0530 Subject: [PATCH 70/74] README about blcoking code --- crates/runtime/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 18bb2289113..68037d752bf 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -56,6 +56,6 @@ Feature flags: - **One shared virtual clock.** All simulated nodes share a single clock. This masks bugs related to timing mismatch across machines. -- **`spawn_blocking` is only a facade on the simulation backend.** It delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The direction is to avoid relying on blocking-pool semantics. +- **No good alternative for blocking APIs.** The simulation backend has no `spawn_blocking` pool or OS thread escape hatch. API like `spawn_blocking` or `Handle::block_on` delegate to the single executor thread, so blocking inside them stalls all simulated tasks. The direction is to avoid relying on blocking semantics inside the simulation boundary. - **OS randomness is not controlled.** `sim_std` warns if code reaches OS entropy. The direction is to keep application code and testing harnesses off OS randomness entirely. From 3c525f22b5614d4a181ddcab478848bc73ccee9e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 18:07:48 +0530 Subject: [PATCH 71/74] comment --- .../src/sim/{executor.rs => executor/mod.rs} | 0 crates/runtime/src/sim/time/mod.rs | 3 +++ crates/runtime/src/sim/time/sleep.rs | 19 +++++++++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) rename crates/runtime/src/sim/{executor.rs => executor/mod.rs} (100%) diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor/mod.rs similarity index 100% rename from crates/runtime/src/sim/executor.rs rename to crates/runtime/src/sim/executor/mod.rs diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index de0a201d9f2..f8bf3571cf2 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -9,6 +9,9 @@ pub use sleep::Sleep; /// Shared virtual clock and timer registry for one simulation runtime. /// +/// Virtual clock that only advances when explicitly driven — no wall-clock +/// progression, like Tokio's time-pause mode. +/// /// All cloned handles observe the same virtual `now`, pending timers, and /// timer-id sequence. The executor uses this handle both for explicit /// time-travel operations and for jumping directly to the next pending timer diff --git a/crates/runtime/src/sim/time/sleep.rs b/crates/runtime/src/sim/time/sleep.rs index 538439018b7..53d5555ffc3 100644 --- a/crates/runtime/src/sim/time/sleep.rs +++ b/crates/runtime/src/sim/time/sleep.rs @@ -10,10 +10,21 @@ use super::{TimeHandle, TimerId}; /// Future returned by [`TimeHandle::sleep`]. /// -/// The future stores a relative duration until first poll, then converts that -/// into an absolute deadline and a stable timer id. Subsequent polls either -/// complete immediately if virtual time has already reached the deadline or -/// refresh the registered waker and remain pending. +/// Three-state machine: +/// +/// 1. **Unregistered** — first poll. Converts the relative `duration` into an +/// absolute `deadline` using the current virtual time and registers with the +/// time handle's timer table. Transitions to `Registered`. +/// +/// 2. **Registered** — subsequent polls. If virtual time has reached the +/// deadline, the timer is cancelled and the future returns `Ready`. +/// Otherwise, the waker is refreshed in the timer entry and the future +/// returns `Pending`. +/// +/// 3. **Done** — any later poll returns `Ready(()`) immediately. +/// +/// On drop while `Registered`, the timer entry is cancelled to prevent stale +/// wakers from firing after the future is abandoned. pub struct Sleep { duration: Duration, state: SleepState, From 77ebb411ad2007bce732b33f1bc78a4429e6ef84 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 18:36:02 +0530 Subject: [PATCH 72/74] executor split --- crates/runtime/src/lib.rs | 7 +- crates/runtime/src/sim/executor/mod.rs | 111 +--------------- crates/runtime/src/sim/executor/task.rs | 162 ++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 111 deletions(-) create mode 100644 crates/runtime/src/sim/executor/task.rs diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index dfc96a3c870..5611d5db3ed 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -282,7 +282,6 @@ mod tests { Arc, }; - #[cfg(feature = "simulation")] #[test] fn dropping_joinhandle_does_not_cancel_task_in_simulation() { @@ -320,7 +319,6 @@ mod tests { rt.block_on(async move { let jh = handle.spawn(async move { - // Sleep long enough that abort fires first. handle_for_spawn .timeout(std::time::Duration::from_millis(100), async {}) .await @@ -330,10 +328,7 @@ mod tests { jh.abort_handle().abort(); let result = jh.await; - // wait to see, above task indeed cancelled. - let _ = handle - .timeout(std::time::Duration::from_millis(500), async {}) - .await; + let _ = handle.timeout(std::time::Duration::from_millis(500), async {}).await; assert!(result.is_err()); assert!(!flag.load(Ordering::Acquire)); }); diff --git a/crates/runtime/src/sim/executor/mod.rs b/crates/runtime/src/sim/executor/mod.rs index c064bca804f..0b874be8afe 100644 --- a/crates/runtime/src/sim/executor/mod.rs +++ b/crates/runtime/src/sim/executor/mod.rs @@ -12,6 +12,10 @@ use spin::Mutex; use crate::sim::{time::TimeHandle, Rng}; +mod task; +pub use task::{AbortHandle, JoinError, JoinHandle}; +use task::Abortable; + type Runnable = async_task::Runnable; #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -340,105 +344,6 @@ impl Handle { } } -/// A spawned simulated task. -pub struct JoinHandle { - task: async_task::Task, NodeId>, - abort: AbortHandle, -} - -impl JoinHandle { - /// Return a handle that can cancel this task without consuming the join - /// handle. - pub fn abort_handle(&self) -> AbortHandle { - self.abort.clone() - } - - /// Detach the task so it continues running without awaiting its output. - pub fn detach(self) { - self.task.detach(); - } - - pub(crate) fn poll_join(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.task).poll(cx) - } -} - -impl Future for JoinHandle { - type Output = Result; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - self.as_mut().poll_join(cx) - } -} - -#[derive(Clone)] -pub struct AbortHandle { - state: Arc, -} - -impl AbortHandle { - pub fn abort(&self) { - self.state.aborted.store(true, Ordering::Relaxed); - if let Some(waker) = self.state.waker.lock().take() { - waker.wake(); - } - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct JoinError; - -impl fmt::Display for JoinError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("task was cancelled") - } -} - -#[cfg(feature = "simulation")] -impl std::error::Error for JoinError {} - -struct AbortState { - aborted: AtomicBool, - waker: Mutex>, -} - -impl AbortState { - fn new() -> Self { - Self { - aborted: AtomicBool::new(false), - waker: Mutex::new(None), - } - } -} - -struct Abortable { - future: F, - abort: AbortHandle, -} - -impl Abortable { - fn new(future: F, abort: AbortHandle) -> Self { - Self { future, abort } - } -} - -impl Future for Abortable { - type Output = Result; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - if self.abort.state.aborted.load(Ordering::Relaxed) { - return Poll::Ready(Err(JoinError)); - } - - self.abort.state.waker.lock().replace(cx.waker().clone()); - - // SAFETY: the wrapper never moves `future` after being pinned. Only the - // cancellation fields outside `future` are accessed normally. - let mut future = unsafe { self.map_unchecked_mut(|this| &mut this.future) }; - future.as_mut().poll(cx).map(Ok) - } -} - /// Core single-threaded scheduler backing a simulation [`Runtime`]. /// /// The executor owns the runnable queue, per-node pause state, deterministic @@ -533,9 +438,7 @@ impl Executor { { self.assert_known_node(node); - let abort = AbortHandle { - state: Arc::new(AbortState::new()), - }; + let abort = AbortHandle::new(); let abortable = Abortable::new(future, abort.clone()); let sender = self.sender.clone(); let (runnable, task) = async_task::Builder::new() @@ -554,9 +457,7 @@ impl Executor { { self.assert_known_node(node); - let abort = AbortHandle { - state: Arc::new(AbortState::new()), - }; + let abort = AbortHandle::new(); let abortable = Abortable::new(future, abort.clone()); let sender = self.sender.clone(); let (runnable, task) = unsafe { diff --git a/crates/runtime/src/sim/executor/task.rs b/crates/runtime/src/sim/executor/task.rs new file mode 100644 index 00000000000..d98ad3d8348 --- /dev/null +++ b/crates/runtime/src/sim/executor/task.rs @@ -0,0 +1,162 @@ +use alloc::sync::Arc; +use core::{ + fmt, + future::Future, + pin::Pin, + sync::atomic::{AtomicBool, Ordering}, + task::{Context, Poll, Waker}, +}; + +use spin::Mutex; + +use super::NodeId; + +/// A spawned simulated task. +/// +/// Two handles reference the same underlying allocation: +/// - `JoinHandle` awaits the output and holds an `AbortHandle` for cancellation. +/// - The executor holds the `Runnable` (not visible here). +pub struct JoinHandle { + // async_task::Task owns a shared heap-allocated cell that holds the future, + // its output, metadata (NodeId), and waker. Polling it drives the future + // to completion. Dropping it without detach cancels the future. + pub(crate) task: async_task::Task, NodeId>, + // Clone of the same AbortHandle that Abortable holds inside the task. + pub(crate) abort: AbortHandle, +} + +impl JoinHandle { + /// Return a handle that can cancel this task. + pub fn abort_handle(&self) -> AbortHandle { + self.abort.clone() + } + + /// Drop the join handle without cancelling the task. + pub fn detach(self) { + // async_task::Task::detach makes Drop a no-op — the future keeps running. + self.task.detach(); + } + + /// Poll the underlying async_task::Task for its output. + pub(crate) fn poll_join( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + // async_task::Task implements Future. Polling it drives the wrapped + // Abortable future inside the executor. + Pin::new(&mut self.task).poll(cx) + } +} + +impl Future for JoinHandle { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.as_mut().poll_join(cx) + } +} + +/// Two-phase cancellation for a simulated task. +/// +/// [`AbortHandle`] and [`Abortable`] work together: +/// - `abort()` sets an atomic flag and wakes the task so it gets polled. +/// - On the next poll, `Abortable` checks the flag and returns `Err(JoinError)`. +/// - `JoinHandle::poll` reads that error and surfaces it to the awaiting code. +/// - The task's future is dropped naturally when `Abortable` returns `Err`. +/// +/// `abort()` is thread-safe — it can be called from any task or node, and the +/// waker ensures the target task is re-scheduled even if it was blocked on I/O +/// or a timer. +#[derive(Clone)] +pub struct AbortHandle { + state: Arc, +} + +impl AbortHandle { + pub(crate) fn new() -> Self { + Self { + state: Arc::new(AbortState::new()), + } + } + + pub fn abort(&self) { + // Step 1: atomically mark the task as aborted. + self.state.aborted.store(true, Ordering::Relaxed); + // Step 2: wake the task so the executor re-schedules it for polling. + // If the task is blocked on a timer, the waker cancels that wait. + if let Some(waker) = self.state.waker.lock().take() { + waker.wake(); + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct JoinError; + +impl fmt::Display for JoinError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("task was cancelled") + } +} + +#[cfg(feature = "simulation")] +impl std::error::Error for JoinError {} + +// Shared state between AbortHandle and Abortable. +struct AbortState { + // Set to true by AbortHandle::abort(), read by Abortable::poll(). + aborted: AtomicBool, + // The executor's waker, registered by Abortable on every poll. + // Stored so abort() can wake the task even if it's waiting on I/O. + waker: Mutex>, +} + +impl AbortState { + fn new() -> Self { + Self { + aborted: AtomicBool::new(false), + waker: Mutex::new(None), + } + } +} + +/// Wraps a future so it can be cancelled via an [`AbortHandle`]. +/// +/// The executor wraps every spawned future in `Abortable`. On each poll it +/// checks the cancellation flag before progressing the inner future. +pub(crate) struct Abortable { + future: F, + abort: AbortHandle, +} + +impl Abortable { + pub(crate) fn new(future: F, abort: AbortHandle) -> Self { + Self { future, abort } + } +} + +impl Future for Abortable { + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + // Check cancellation before doing any work. + if self.abort.state.aborted.load(Ordering::Relaxed) { + return Poll::Ready(Err(JoinError)); + } + + // Register the waker so abort() can wake this task. + self.abort.state.waker.lock().replace(cx.waker().clone()); + + // SAFETY: The `Abortable` struct is `#[repr(transparent)]`-like in its + // pin projection: `future` is behind the cancellation fields (`abort`) + // that are never moved once pinned. We use `map_unchecked_mut` to project + // through the struct layout, which is safe because: + // 1. `future` is a direct field of `Abortable` — no indirection. + // 2. `abort` is never moved or modified in ways that would change the + // address of `future` relative to `self`. + // 3. The caller guarantees `self` stays pinned for the lifetime of the + // future. + let mut future = unsafe { self.map_unchecked_mut(|this| &mut this.future) }; + future.as_mut().poll(cx).map(Ok) + } +} From 1a498927f5e412b6e8d6046101c832018554b66a Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 18 May 2026 20:41:58 +0530 Subject: [PATCH 73/74] streamline properties --- Cargo.lock | 2 - crates/dst/Cargo.toml | 4 +- crates/dst/proptest-regressions/datastore.txt | 7 - crates/dst/src/config.rs | 39 +- crates/dst/src/core/mod.rs | 5 +- crates/dst/src/lib.rs | 16 +- crates/dst/src/main.rs | 33 +- crates/dst/src/properties.rs | 53 +- crates/dst/src/properties/rules.rs | 190 +-- crates/dst/src/properties/runtime.rs | 71 +- crates/dst/src/schema.rs | 8 +- crates/dst/src/seed.rs | 52 - crates/dst/src/sim/commitlog.rs | 59 +- crates/dst/src/sim/mod.rs | 38 +- crates/dst/src/sim/snapshot.rs | 74 +- crates/dst/src/sim/storage_faults.rs | 196 ++- crates/dst/src/sim/time.rs | 12 +- crates/dst/src/targets/descriptor.rs | 55 +- crates/dst/src/targets/mod.rs | 1 - .../src/targets/relational_db_commitlog.rs | 1360 +++-------------- .../src/targets/relational_db_concurrent.rs | 1045 ------------- .../src/workload/commitlog_ops/generation.rs | 279 ---- crates/dst/src/workload/commitlog_ops/mod.rs | 11 - .../dst/src/workload/commitlog_ops/types.rs | 169 -- crates/dst/src/workload/mod.rs | 3 +- crates/dst/src/workload/strategy.rs | 24 +- .../dst/src/workload/table_ops/generation.rs | 37 +- crates/dst/src/workload/table_ops/mod.rs | 2 - crates/dst/src/workload/table_ops/model.rs | 12 +- .../workload/table_ops/scenarios/banking.rs | 108 -- .../src/workload/table_ops/scenarios/mod.rs | 49 +- .../table_ops/scenarios/random_crud.rs | 38 +- .../dst/src/workload/table_ops/strategies.rs | 8 +- crates/dst/src/workload/table_ops/types.rs | 4 +- 34 files changed, 573 insertions(+), 3491 deletions(-) delete mode 100644 crates/dst/proptest-regressions/datastore.txt delete mode 100644 crates/dst/src/seed.rs delete mode 100644 crates/dst/src/targets/relational_db_concurrent.rs delete mode 100644 crates/dst/src/workload/commitlog_ops/generation.rs delete mode 100644 crates/dst/src/workload/commitlog_ops/mod.rs delete mode 100644 crates/dst/src/workload/commitlog_ops/types.rs delete mode 100644 crates/dst/src/workload/table_ops/scenarios/banking.rs diff --git a/Cargo.lock b/Cargo.lock index b14b43e4db2..84097cd7086 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8206,14 +8206,12 @@ dependencies = [ "spacetimedb-datastore", "spacetimedb-durability", "spacetimedb-lib 2.2.0", - "spacetimedb-paths", "spacetimedb-primitives 2.2.0", "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "spacetimedb-schema", "spacetimedb-snapshot", "spacetimedb-table", - "tempfile", "tracing", "tracing-subscriber", ] diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index add6ccd36ad..c3e2b3ea519 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -23,13 +23,11 @@ spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = " spacetimedb-commitlog = { workspace = true, features = ["test"] } spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0", features = ["test"] } spacetimedb-lib.workspace = true -spacetimedb-paths.workspace = true +spacetimedb-snapshot.workspace = true spacetimedb-primitives.workspace = true spacetimedb-runtime = { workspace = true, features = ["simulation"] } spacetimedb-sats.workspace = true spacetimedb-schema = { workspace = true, features = ["test"] } -spacetimedb-snapshot.workspace = true spacetimedb-table.workspace = true -tempfile.workspace = true tracing.workspace = true tracing-subscriber.workspace = true diff --git a/crates/dst/proptest-regressions/datastore.txt b/crates/dst/proptest-regressions/datastore.txt deleted file mode 100644 index a76f311290a..00000000000 --- a/crates/dst/proptest-regressions/datastore.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc d9b364a151c583c83224b9ddcc17de730b057b77c5509c8433e8dc12514d2415 # shrinks to seed = 0 diff --git a/crates/dst/src/config.rs b/crates/dst/src/config.rs index 5968c5abb96..1f37e217fb8 100644 --- a/crates/dst/src/config.rs +++ b/crates/dst/src/config.rs @@ -1,31 +1,24 @@ //! Shared run-budget configuration for DST targets. -use std::{ - fmt, - time::{Duration, Instant}, -}; +use std::time::{Duration, Instant}; -/// Coarse disk-fault profile for commitlog-backed DST targets. -#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] -pub enum CommitlogFaultProfile { +/// Storage fault-injection profile for commitlog and snapshot wrappers. +/// +/// These are not CLI options yet; they are programmatic knobs for targeted +/// fault-injection tests. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) enum CommitlogFaultProfile { + /// No faults injected regardless of buggify state. Off, + /// Low probability latency and short I/O only. Light, + /// Moderate-latency and short I/O only. #[default] Default, + /// Heavy-latency and short I/O only. Aggressive, } -impl fmt::Display for CommitlogFaultProfile { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Off => f.write_str("off"), - Self::Light => f.write_str("light"), - Self::Default => f.write_str("default"), - Self::Aggressive => f.write_str("aggressive"), - } - } -} - /// Common stop conditions for generated DST runs. #[derive(Clone, Debug, Eq, PartialEq)] pub struct RunConfig { @@ -41,8 +34,6 @@ pub struct RunConfig { /// with host speed and runtime behavior. Use `max_interactions` when a /// failure needs precise replay. pub max_duration_ms: Option, - /// Disk-fault profile for commitlog-backed targets. - pub commitlog_fault_profile: CommitlogFaultProfile, } impl Default for RunConfig { @@ -50,7 +41,6 @@ impl Default for RunConfig { Self { max_interactions: None, max_duration_ms: None, - commitlog_fault_profile: CommitlogFaultProfile::Default, } } } @@ -60,7 +50,6 @@ impl RunConfig { Self { max_interactions: Some(max_interactions), max_duration_ms: None, - ..Default::default() } } @@ -68,15 +57,9 @@ impl RunConfig { Ok(Self { max_interactions: None, max_duration_ms: Some(parse_duration_spec(duration)?.as_millis() as u64), - ..Default::default() }) } - pub fn with_commitlog_fault_profile(mut self, profile: CommitlogFaultProfile) -> Self { - self.commitlog_fault_profile = profile; - self - } - /// Return the wall-clock deadline for duration-budgeted runs. /// /// This intentionally uses `std::time::Instant`, not simulated time. DST diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index 3920471971c..400c132a35f 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -54,9 +54,6 @@ where E: TargetEngine, P: StreamingProperties, { - // Duration is a harness-level wall-clock stop condition. The reproducible - // budget for exact replay is `RunConfig::max_interactions`, which the - // source uses when it is constructed. let deadline = cfg.deadline(); let mut step = 0usize; loop { @@ -249,7 +246,7 @@ mod tests { } fn assert_not_crash_error(phase: PanicPhase, expected_phase: &str, expected_payload: &str) { - let mut runtime = crate::sim::Runtime::new(crate::seed::DstSeed(0)).expect("runtime"); + let mut runtime = crate::sim::Runtime::new(0).expect("runtime"); let err = runtime .block_on(run_streaming( SingleStepSource::new(), diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs index 5463186a8b9..cfebd1a113d 100644 --- a/crates/dst/src/lib.rs +++ b/crates/dst/src/lib.rs @@ -5,22 +5,20 @@ //! - [`client`] for logical client/session identifiers, //! - [`config`] for run budgets, //! - [`properties`] for reusable semantic checks, -//! - [`seed`] for deterministic seeds, //! - [`workload`] for scenario identifiers, -//! - [`targets`] for the executable relational-db + commitlog adapter. +//! - [`targets`] for the executable relational-db adapter. //! //! ## DST principles //! -//! 1. Every generated choice comes from [`seed::DstSeed`] or a simulator-provided -//! deterministic source. A failing run should be replayable from the printed -//! seed and CLI arguments. Use `--max-interactions` for exact replay; duration -//! budgets are wall-clock soak limits. +//! 1. Every generated choice comes from a simulator-provided deterministic +//! source. A failing run should be replayable from the printed seed and CLI +//! arguments. Use `--max-interactions` for exact replay; duration budgets are +//! wall-clock soak limits. //! 2. Workloads describe legal but stressful user behavior. Targets may add //! faults and lifecycle disruption, but the generator should not depend on //! target internals. //! 3. Oracles should check observable state, not merely absence of panics. When -//! possible, compare the target against a simple model or a replayed durable -//! history. +//! possible, compare the target against a simple model. //! 4. Keep generation, execution, and property checking separate. This makes it //! clear whether a failure came from an invalid workload, a target bug, or a //! weak assertion. @@ -41,8 +39,6 @@ pub mod core; /// Reusable semantic properties and oracle-model checks. pub(crate) mod properties; mod schema; -/// Stable seed and RNG utilities used to make runs reproducible. -pub mod seed; /// Local executor and deterministic-decision shim. pub mod sim; /// Concrete simulator targets. diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index 53e368adb92..b957c4fb0c4 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -3,8 +3,8 @@ use std::time::{SystemTime, UNIX_EPOCH}; use clap::{Args, Parser, Subcommand}; use spacetimedb_dst::{ config::RunConfig, - seed::DstSeed, - targets::descriptor::{RelationalDbConcurrentDescriptor, TargetDescriptor}, + targets::descriptor::{RelationalDbCommitlogDescriptor, TargetDescriptor}, + workload::table_ops::TableScenarioId, }; #[derive(Parser, Debug)] @@ -31,6 +31,8 @@ struct RunArgs { duration: Option, #[arg(long, help = "Deterministic interaction budget. Preferred for replayable failures.")] max_interactions: Option, + #[arg(long, help = "Scenario to run [default: random-crud]")] + scenario: Option, } fn main() -> anyhow::Result<()> { @@ -56,12 +58,13 @@ fn init_tracing() { fn run_command(args: RunArgs) -> anyhow::Result<()> { let seed = resolve_seed(args.seed); let config = build_config(args.duration.as_deref(), args.max_interactions)?; + let scenario = resolve_scenario(args.scenario.as_deref()); - run_prepared_target::(seed, (), config) + run_prepared_target::(seed, scenario, config) } fn run_prepared_target( - seed: DstSeed, + seed: u64, scenario: D::Scenario, config: RunConfig, ) -> anyhow::Result<()> @@ -78,22 +81,30 @@ where .unwrap_or_else(|payload| std::panic::resume_unwind(payload)) } -fn resolve_seed(seed: Option) -> DstSeed { - seed.map(DstSeed).unwrap_or_else(|| { - let nanos = SystemTime::now() +fn resolve_seed(seed: Option) -> u64 { + seed.unwrap_or_else(|| { + SystemTime::now() .duration_since(UNIX_EPOCH) .expect("time went backwards") - .as_nanos() as u64; - DstSeed(nanos) + .as_nanos() as u64 }) } +fn resolve_scenario(scenario: Option<&str>) -> TableScenarioId { + match scenario { + Some("random-crud") | None => TableScenarioId::RandomCrud, + Some(other) => { + eprintln!("unknown scenario: {other}, using random-crud"); + TableScenarioId::RandomCrud + } + } +} + fn build_config(duration: Option<&str>, max_interactions: Option) -> anyhow::Result { Ok(match (duration, max_interactions) { (Some(duration), Some(max_interactions)) => RunConfig { max_interactions: Some(max_interactions), max_duration_ms: Some(spacetimedb_dst::config::parse_duration_spec(duration)?.as_millis() as u64), - ..Default::default() }, (Some(duration), None) => RunConfig::with_duration_spec(duration)?, (None, Some(max_interactions)) => RunConfig::with_max_interactions(max_interactions), @@ -103,7 +114,7 @@ fn build_config(duration: Option<&str>, max_interactions: Option) -> anyh #[allow(clippy::disallowed_macros)] async fn run_target( - seed: DstSeed, + seed: u64, scenario: D::Scenario, config: RunConfig, ) -> anyhow::Result<()> { diff --git a/crates/dst/src/properties.rs b/crates/dst/src/properties.rs index 11d652fcaec..dbe227c2dd9 100644 --- a/crates/dst/src/properties.rs +++ b/crates/dst/src/properties.rs @@ -11,22 +11,6 @@ //! target observations, target-visible state, oracle models, and final //! outcomes. Failures should include a stable property name and enough context //! to replay the seed or trace. -//! -//! The current catalog is intentionally small and falls into the same groups -//! used by the proposal: -//! -//! - Safety properties: `NotCrash`, `ErrorMatchesOracle`, -//! `NoMutationMatchesModel`, `DurableReplayMatchesModel`, -//! `SnapshotCaptureMaintainsPrefix`, `SnapshotRestoreWithinDurablePrefix`, -//! `BankingTablesMatch`, and `DynamicMigrationAutoInc`. -//! - Model/oracle properties: `PointLookupMatchesModel`, -//! `PredicateCountMatchesModel`, `RangeScanMatchesModel`, -//! `FullScanMatchesModel`, and the scenario-specific final table-state check. -//! - Differential and metamorphic properties: `InsertSelect`, `DeleteSelect`, -//! `SelectSelectOptimizer`, `WhereTrueFalseNull`, and `IndexRangeExcluded`. -//! - Coverage and progress properties are not first-class rules yet. For now, -//! targets expose operation and outcome counters. Those counters should become -//! selectable properties once long-running and replication targets need them. mod rules; mod runtime; @@ -38,10 +22,7 @@ use spacetimedb_sats::AlgebraicValue; use crate::{ client::SessionId, schema::{SchemaPlan, SimRow}, - workload::{ - commitlog_ops::{DurableReplaySummary, SnapshotObservation}, - table_ops::{TableErrorKind, TableWorkloadInteraction, TableWorkloadOutcome}, - }, + workload::table_ops::{TableErrorKind, TableWorkloadInteraction, TableWorkloadOutcome}, }; pub(crate) use runtime::PropertyRuntime; @@ -80,16 +61,6 @@ pub(crate) enum PropertyKind { WhereTrueFalseNull, /// Metamorphic: composite index range scans implement excluded upper bounds correctly. IndexRangeExcluded, - /// Safety: banking scenario debit and credit shadow tables remain identical. - BankingTablesMatch, - /// Safety: auto-increment IDs continue advancing after dynamic table migration. - DynamicMigrationAutoInc, - /// Safety: durable replay state equals the oracle committed model. - DurableReplayMatchesModel, - /// Safety: failed snapshot capture does not publish a newer usable snapshot. - SnapshotCaptureMaintainsPrefix, - /// Safety: restored snapshots are within the durable prefix. - SnapshotRestoreWithinDurablePrefix, /// Safety: observed errors match the model-predicted error class. ErrorMatchesOracle, /// Safety: model-predicted no-op interactions do not mutate visible state. @@ -104,15 +75,6 @@ pub(crate) enum PropertyKind { FullScanMatchesModel, } -#[derive(Clone, Debug)] -pub(crate) struct DynamicMigrationProbe { - pub slot: u32, - pub from_version: u32, - pub to_version: u32, - pub existing_rows: Vec, - pub inserted_row: SimRow, -} - #[derive(Clone, Debug)] pub(crate) enum TableMutation { Inserted { @@ -164,16 +126,6 @@ pub(crate) enum TableObservation { CommitOrRollback, } -#[derive(Clone, Debug)] -pub(crate) enum CommitlogObservation { - Table(TableObservation), - Applied, - Skipped, - DynamicMigrationProbe(DynamicMigrationProbe), - Snapshot(SnapshotObservation), - DurableReplay(DurableReplaySummary), -} - struct PropertyContext<'a> { access: &'a dyn TargetPropertyAccess, models: &'a runtime::PropertyModels, @@ -232,8 +184,5 @@ enum PropertyEvent<'a> { actual: &'a [SimRow], }, CommitOrRollback, - DynamicMigrationProbe(&'a DynamicMigrationProbe), - SnapshotCapture(&'a SnapshotObservation), - DurableReplay(&'a DurableReplaySummary), TableWorkloadFinished(&'a TableWorkloadOutcome), } diff --git a/crates/dst/src/properties/rules.rs b/crates/dst/src/properties/rules.rs index cb3f5bfc5d9..9d2552014c2 100644 --- a/crates/dst/src/properties/rules.rs +++ b/crates/dst/src/properties/rules.rs @@ -5,15 +5,12 @@ use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; use crate::{ client::SessionId, schema::{SchemaPlan, SimRow}, - workload::{ - commitlog_ops::SnapshotCaptureStatus, - table_ops::{TableOperation, TableScenario}, - }, + workload::table_ops::{TableOperation, TableScenario}, }; -use super::{PropertyContext, PropertyEvent, PropertyKind, TableMutation, TableObservation, TargetPropertyAccess}; +use super::{PropertyContext, PropertyEvent, PropertyKind, TableMutation, TableObservation}; -pub(super) trait PropertyRule { +pub(crate) trait PropertyRule { fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { let _ = ctx; let _ = event; @@ -29,11 +26,6 @@ pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { PropertyKind::SelectSelectOptimizer => Box::::default(), PropertyKind::WhereTrueFalseNull => Box::::default(), PropertyKind::IndexRangeExcluded => Box::::default(), - PropertyKind::BankingTablesMatch => Box::::default(), - PropertyKind::DynamicMigrationAutoInc => Box::::default(), - PropertyKind::DurableReplayMatchesModel => Box::::default(), - PropertyKind::SnapshotCaptureMaintainsPrefix => Box::::default(), - PropertyKind::SnapshotRestoreWithinDurablePrefix => Box::::default(), PropertyKind::ErrorMatchesOracle => Box::::default(), PropertyKind::NoMutationMatchesModel => Box::::default(), PropertyKind::PointLookupMatchesModel => Box::::default(), @@ -43,7 +35,7 @@ pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { } } -pub(super) fn oracle_table_state_rule(scenario: S, schema: SchemaPlan) -> Box +pub(crate) fn oracle_table_state_rule(scenario: S, schema: SchemaPlan) -> Box where S: TableScenario + 'static, { @@ -297,154 +289,6 @@ impl PropertyRule for IndexRangeExcludedRule { } } -#[derive(Default)] -struct BankingMatchRule; - -impl PropertyRule for BankingMatchRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - match event { - PropertyEvent::RowInserted { in_tx: false, .. } - | PropertyEvent::RowDeleted { in_tx: false, .. } - | PropertyEvent::CommitOrRollback => check_banking_tables_match(ctx.access), - _ => Ok(()), - } - } -} - -#[derive(Default)] -struct DynamicMigrationAutoIncRule; - -impl PropertyRule for DynamicMigrationAutoIncRule { - fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::DynamicMigrationProbe(probe) = event else { - return Ok(()); - }; - let max_existing_id = probe - .existing_rows - .iter() - .filter_map(sim_row_integer_id) - .max() - .unwrap_or(0); - let inserted_id = sim_row_integer_id(&probe.inserted_row).ok_or_else(|| { - format!( - "[DynamicMigrationAutoInc] probe row missing integer id for slot={}, from_version={}, to_version={}: {:?}", - probe.slot, probe.from_version, probe.to_version, probe.inserted_row - ) - })?; - if inserted_id <= max_existing_id { - return Err(format!( - "[DynamicMigrationAutoInc] non-advancing id for slot={}, from_version={}, to_version={}: inserted_id={}, max_existing_id={}", - probe.slot, probe.from_version, probe.to_version, inserted_id, max_existing_id - )); - } - Ok(()) - } -} - -#[derive(Default)] -struct DurableReplayMatchesModelRule; - -impl PropertyRule for DurableReplayMatchesModelRule { - fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::DurableReplay(replay) = event else { - return Ok(()); - }; - let expected_rows = ctx.models.table().committed_rows(); - if replay.base_rows != expected_rows { - return Err(format!( - "[DurableReplayMatchesModel] replayed durable state mismatch at durable_offset {:?}, restored_snapshot {:?}: expected={expected_rows:?} actual={:?}", - replay.durable_offset, replay.restored_snapshot_offset, replay.base_rows - )); - } - Ok(()) - } -} - -#[derive(Default)] -struct SnapshotCaptureMaintainsPrefixRule; - -impl PropertyRule for SnapshotCaptureMaintainsPrefixRule { - fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::SnapshotCapture(snapshot) = event else { - return Ok(()); - }; - - match snapshot.status { - SnapshotCaptureStatus::Captured { offset } => { - if snapshot.latest_after != Some(offset) { - return Err(format!( - "[SnapshotCaptureMaintainsPrefix] captured offset {offset}, but latest snapshot is {:?}: {snapshot:?}", - snapshot.latest_after - )); - } - let durable = snapshot.durable_offset.ok_or_else(|| { - format!( - "[SnapshotCaptureMaintainsPrefix] captured snapshot {offset} without a durable offset: {snapshot:?}" - ) - })?; - if offset > durable { - return Err(format!( - "[SnapshotCaptureMaintainsPrefix] captured snapshot {offset} beyond durable offset {durable}: {snapshot:?}" - )); - } - } - SnapshotCaptureStatus::SkippedInjectedFault => { - if snapshot.latest_after > snapshot.latest_before { - return Err(format!( - "[SnapshotCaptureMaintainsPrefix] injected snapshot fault published newer snapshot: before={:?}, after={:?}", - snapshot.latest_before, snapshot.latest_after - )); - } - } - SnapshotCaptureStatus::SkippedOpenTransaction | SnapshotCaptureStatus::SkippedNoSnapshotCreated => { - if snapshot.latest_after != snapshot.latest_before { - return Err(format!( - "[SnapshotCaptureMaintainsPrefix] skipped snapshot changed latest snapshot: before={:?}, after={:?}, status={:?}", - snapshot.latest_before, snapshot.latest_after, snapshot.status - )); - } - } - } - Ok(()) - } -} - -#[derive(Default)] -struct SnapshotRestoreWithinDurablePrefixRule; - -impl PropertyRule for SnapshotRestoreWithinDurablePrefixRule { - fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::DurableReplay(replay) = event else { - return Ok(()); - }; - let Some(snapshot_offset) = replay.restored_snapshot_offset else { - return Ok(()); - }; - let durable_offset = replay.durable_offset.ok_or_else(|| { - format!( - "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset} without durable offset: {replay:?}" - ) - })?; - if snapshot_offset > durable_offset { - return Err(format!( - "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset} beyond durable offset {durable_offset}: {replay:?}" - )); - } - if replay.latest_snapshot_offset == Some(snapshot_offset) { - return Ok(()); - } - if let Some(latest) = replay.latest_snapshot_offset - && latest <= durable_offset - && latest > snapshot_offset - { - return Err(format!( - "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset}, but newer usable snapshot {latest} exists within durable offset {durable_offset}: {replay:?}" - )); - } - Ok(()) - } -} - #[derive(Default)] struct ErrorMatchesOracleRule; @@ -631,35 +475,9 @@ impl PropertyRule for FullScanMatchesModelRule { } } -fn check_banking_tables_match(access: &dyn TargetPropertyAccess) -> Result<(), String> { - let schema = access.schema_plan(); - let debit = schema.tables.iter().position(|table| table.name == "debit_accounts"); - let credit = schema.tables.iter().position(|table| table.name == "credit_accounts"); - let (Some(left), Some(right)) = (debit, credit) else { - return Ok(()); - }; - - let left_rows = access.collect_rows_for_table(left)?; - let right_rows = access.collect_rows_for_table(right)?; - if left_rows != right_rows { - return Err(format!( - "[Shadow::AllTableHaveExpectedContent] banking mismatch: debit={left_rows:?}, credit={right_rows:?}" - )); - } - Ok(()) -} - fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { lhs.project_key(cols) .to_algebraic_value() .cmp(&rhs.project_key(cols).to_algebraic_value()) .then_with(|| lhs.values.cmp(&rhs.values)) } - -fn sim_row_integer_id(row: &SimRow) -> Option { - match row.values.first() { - Some(AlgebraicValue::I64(value)) => Some(*value as i128), - Some(AlgebraicValue::U64(value)) => Some(*value as i128), - _ => None, - } -} diff --git a/crates/dst/src/properties/runtime.rs b/crates/dst/src/properties/runtime.rs index c6f67c26e3f..52951b10b17 100644 --- a/crates/dst/src/properties/runtime.rs +++ b/crates/dst/src/properties/runtime.rs @@ -6,19 +6,12 @@ use crate::{ client::SessionId, core::{StreamingProperties, TargetEngine}, schema::{SchemaPlan, SimRow}, - workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary, SnapshotObservation}, - table_ops::{ - PredictedOutcome, TableErrorKind, TableOracle, TableScenario, TableWorkloadInteraction, - TableWorkloadOutcome, - }, - }, + workload::table_ops::{PredictedOutcome, TableErrorKind, TableOracle, TableWorkloadInteraction, TableWorkloadOutcome}, }; use super::{ rules::{oracle_table_state_rule, rule_for_kind, PropertyRule}, - CommitlogObservation, DynamicMigrationProbe, PropertyContext, PropertyEvent, PropertyKind, TableMutation, - TableObservation, TargetPropertyAccess, + PropertyContext, PropertyEvent, PropertyKind, TableMutation, TableObservation, TargetPropertyAccess, }; #[derive(Clone, Debug)] @@ -107,7 +100,7 @@ impl PropertyRuntime { pub fn for_table_workload(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self where - S: TableScenario + 'static, + S: crate::workload::table_ops::TableScenario + 'static, { let mut runtime = Self { models: PropertyModels::new(schema.tables.len(), num_connections), @@ -289,30 +282,6 @@ impl PropertyRuntime { self.observe_event(access, PropertyEvent::CommitOrRollback) } - fn on_dynamic_migration_probe( - &mut self, - access: &dyn TargetPropertyAccess, - probe: &DynamicMigrationProbe, - ) -> Result<(), String> { - self.observe_event(access, PropertyEvent::DynamicMigrationProbe(probe)) - } - - fn on_snapshot_capture( - &mut self, - access: &dyn TargetPropertyAccess, - snapshot: &SnapshotObservation, - ) -> Result<(), String> { - self.observe_event(access, PropertyEvent::SnapshotCapture(snapshot)) - } - - fn on_durable_replay( - &mut self, - access: &dyn TargetPropertyAccess, - replay: &DurableReplaySummary, - ) -> Result<(), String> { - self.observe_event(access, PropertyEvent::DurableReplay(replay)) - } - fn on_table_workload_finish( &mut self, access: &dyn TargetPropertyAccess, @@ -392,37 +361,26 @@ impl PropertyRuntime { } } -impl StreamingProperties for PropertyRuntime +impl StreamingProperties for PropertyRuntime where E: TargetEngine< - CommitlogInteraction, - Observation = CommitlogObservation, - Outcome = CommitlogWorkloadOutcome, + TableWorkloadInteraction, + Observation = TableObservation, + Outcome = TableWorkloadOutcome, Error = String, > + TargetPropertyAccess, { fn observe( &mut self, engine: &E, - interaction: &CommitlogInteraction, - observation: &CommitlogObservation, + interaction: &TableWorkloadInteraction, + observation: &TableObservation, ) -> Result<(), String> { - match (interaction, observation) { - (CommitlogInteraction::Table(table_interaction), CommitlogObservation::Table(table_observation)) => { - self.observe_table_observation(engine, table_interaction, table_observation) - } - (_, CommitlogObservation::DynamicMigrationProbe(probe)) => self.on_dynamic_migration_probe(engine, probe), - (_, CommitlogObservation::DurableReplay(replay)) => self.on_durable_replay(engine, replay), - (_, CommitlogObservation::Applied | CommitlogObservation::Skipped) => Ok(()), - (other, observation) => Err(format!( - "observation {observation:?} does not match interaction {other:?}" - )), - } + self.observe_table_observation(engine, interaction, observation) } - fn finish(&mut self, engine: &E, outcome: &CommitlogWorkloadOutcome) -> Result<(), String> { - self.on_durable_replay(engine, &outcome.replay)?; - self.on_table_workload_finish(engine, &outcome.table) + fn finish(&mut self, engine: &E, outcome: &TableWorkloadOutcome) -> Result<(), String> { + self.on_table_workload_finish(engine, outcome) } } @@ -445,11 +403,6 @@ impl Default for PropertyRuntime { PropertyKind::SelectSelectOptimizer, PropertyKind::WhereTrueFalseNull, PropertyKind::IndexRangeExcluded, - PropertyKind::BankingTablesMatch, - PropertyKind::DynamicMigrationAutoInc, - PropertyKind::DurableReplayMatchesModel, - PropertyKind::SnapshotCaptureMaintainsPrefix, - PropertyKind::SnapshotRestoreWithinDurablePrefix, PropertyKind::ErrorMatchesOracle, PropertyKind::NoMutationMatchesModel, PropertyKind::PointLookupMatchesModel, diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs index ebce6c3a34b..fdaaa627954 100644 --- a/crates/dst/src/schema.rs +++ b/crates/dst/src/schema.rs @@ -2,7 +2,7 @@ use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; -use crate::seed::DstRng; +use crate::sim::Rng; /// Generated schema for one simulator case. #[derive(Clone, Debug, Eq, PartialEq)] @@ -41,7 +41,7 @@ pub struct SimRow { pub values: Vec, } -pub fn generate_supported_type(rng: &mut DstRng) -> AlgebraicType { +pub fn generate_supported_type(rng: &Rng) -> AlgebraicType { match rng.index(12) { 0 => AlgebraicType::Bool, 1 => AlgebraicType::I8, @@ -58,7 +58,7 @@ pub fn generate_supported_type(rng: &mut DstRng) -> AlgebraicType { } } -pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { +pub fn generate_value_for_type(rng: &Rng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { if rng.index(5) == 0 { return edge_value_for_type(rng, ty, idx); } @@ -127,7 +127,7 @@ pub fn distinct_value_for_type(ty: &AlgebraicType, current: &AlgebraicValue) -> } } -fn edge_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { +fn edge_value_for_type(rng: &Rng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { match ty { AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), AlgebraicType::I8 => [i8::MIN, -1, 0, 1, i8::MAX][rng.index(5)].into(), diff --git a/crates/dst/src/seed.rs b/crates/dst/src/seed.rs deleted file mode 100644 index 669bb125dd3..00000000000 --- a/crates/dst/src/seed.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! Stable seed and RNG utilities used across DST runs. -//! -//! The important property here is repeatability, not statistical quality. -//! `DstSeed::fork` is used to derive independent substreams without requiring -//! callers to manually coordinate RNG state. - -/// Top-level seed value for a deterministic run. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -pub struct DstSeed(pub u64); - -impl DstSeed { - pub(crate) fn fork(self, discriminator: u64) -> Self { - // derive independent seed using same mixing primitive - Self(splitmix64(self.0 ^ discriminator.wrapping_mul(GAMMA))) - } - - pub(crate) fn rng(self) -> DstRng { - DstRng { - state: splitmix64(self.0), - } - } -} - -/// Small deterministic RNG for simulator code. -#[derive(Clone, Debug)] -pub(crate) struct DstRng { - state: u64, -} - -impl DstRng { - pub(crate) fn next_u64(&mut self) -> u64 { - // advance state, then reuse splitmix64 mixing - self.state = self.state.wrapping_add(GAMMA); - splitmix64(self.state) - } - - pub(crate) fn index(&mut self, len: usize) -> usize { - assert!(len > 0, "len must be non-zero"); - (self.next_u64() as usize) % len - } -} - -// constants reused everywhere -const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; - -/// Reference: https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64 -fn splitmix64(mut x: u64) -> u64 { - x = x.wrapping_add(GAMMA); - x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); - x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); - x ^ (x >> 31) -} diff --git a/crates/dst/src/sim/commitlog.rs b/crates/dst/src/sim/commitlog.rs index a3891571732..7fdd83618fc 100644 --- a/crates/dst/src/sim/commitlog.rs +++ b/crates/dst/src/sim/commitlog.rs @@ -12,12 +12,9 @@ use spacetimedb_commitlog::{ segment::{FileLike, Header}, }; -use crate::{ - seed::DstSeed, - sim::storage_faults::{ - is_injected_fault_text, ShortIoKind, StorageFaultConfig, StorageFaultController, StorageFaultDomain, - StorageFaultKind, StorageFaultSummary, - }, +use crate::sim::storage_faults::{ + is_injected_fault_text, ShortIoKind, StorageFaultConfig, StorageFaultController, StorageFaultDomain, + StorageFaultKind, StorageFaultSummary, }; pub(crate) type CommitlogFaultConfig = StorageFaultConfig; @@ -41,10 +38,10 @@ pub(crate) struct FaultableRepo { } impl FaultableRepo { - pub(crate) fn new(inner: R, config: CommitlogFaultConfig, seed: DstSeed) -> Self { + pub(crate) fn new(inner: R, config: CommitlogFaultConfig) -> Self { Self { inner, - faults: StorageFaultController::new(config, StorageFaultDomain::Disk, seed), + faults: StorageFaultController::new(config, StorageFaultDomain::Disk), } } @@ -63,7 +60,7 @@ impl FaultableRepo { impl fmt::Display for FaultableRepo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}+faultable({})", self.inner, self.faults.summary().profile) + write!(f, "{}+faultable({:?})", self.inner, self.faults.summary().profile) } } @@ -73,6 +70,7 @@ impl Repo for FaultableRepo { fn create_segment(&self, offset: u64, header: Header) -> io::Result { self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Open)?; self.inner .create_segment(offset, header) @@ -89,6 +87,7 @@ impl Repo for FaultableRepo { fn open_segment_writer(&self, offset: u64) -> io::Result { self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Open)?; self.inner .open_segment_writer(offset) @@ -101,36 +100,42 @@ impl Repo for FaultableRepo { fn remove_segment(&self, offset: u64) -> io::Result<()> { self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.remove_segment(offset) } fn compress_segment_with(&self, offset: u64, f: impl CompressOnce) -> io::Result { self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.compress_segment_with(offset, f) } fn existing_offsets(&self) -> io::Result> { self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.existing_offsets() } fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.create_offset_index(offset, cap) } fn remove_offset_index(&self, offset: TxOffset) -> io::Result<()> { self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.remove_offset_index(offset) } fn get_offset_index(&self, offset: TxOffset) -> io::Result { self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.get_offset_index(offset) } @@ -161,13 +166,22 @@ impl Read for FaultableSegment { impl Write for FaultableSegment { fn write(&mut self, buf: &[u8]) -> io::Result { self.faults.maybe_latency(); + self.faults.check_pending_error(StorageFaultKind::Write)?; + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Write)?; + let is_partial = self.faults.sample_partial_failure(); let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Write); - self.inner.write(&buf[..len]) + let n = self.inner.write(&buf[..len])?; + if is_partial && n > 0 { + self.faults.arm_pending_error(); + } + Ok(n) } fn flush(&mut self) -> io::Result<()> { self.faults.maybe_latency(); + self.faults.check_pending_error(StorageFaultKind::Flush)?; + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Flush)?; self.inner.flush() } @@ -191,12 +205,16 @@ impl SegmentLen for FaultableSegment { impl FileLike for FaultableSegment { fn fsync(&mut self) -> io::Result<()> { self.faults.maybe_latency(); + self.faults.check_pending_error(StorageFaultKind::Fsync)?; + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Fsync)?; self.inner.fsync() } fn ftruncate(&mut self, tx_offset: u64, size: u64) -> io::Result<()> { self.faults.maybe_latency(); + self.faults.check_pending_error(StorageFaultKind::Metadata)?; + self.faults.maybe_error(StorageFaultKind::NoSpace)?; self.faults.maybe_error(StorageFaultKind::Metadata)?; self.inner.ftruncate(tx_offset, size) } @@ -261,14 +279,13 @@ impl SegmentReader for FaultableReader { mod tests { use std::io::{BufRead, Cursor}; - use crate::config::CommitlogFaultProfile; + use crate::{config::CommitlogFaultProfile, sim}; use super::*; fn always_short_read_config() -> CommitlogFaultConfig { CommitlogFaultConfig { profile: CommitlogFaultProfile::Default, - enabled: true, latency_prob: 0.0, long_latency_prob: 0.0, short_io_prob: 1.0, @@ -279,16 +296,22 @@ mod tests { open_error_prob: 0.0, metadata_error_prob: 0.0, max_short_io_divisor: 2, + no_space_prob: 0.0, + partial_failure_prob: 0.0, } } #[test] fn buf_read_path_applies_short_read_faults() { - let faults = StorageFaultController::new(always_short_read_config(), StorageFaultDomain::Disk, DstSeed(55)); - faults.enable(); - let mut reader = FaultableReader::new(Cursor::new(vec![1, 2, 3, 4]), faults.clone()); - - assert_eq!(reader.fill_buf().unwrap(), &[1, 2]); - assert_eq!(faults.summary().short_read, 1); + let mut runtime = sim::Runtime::new(55).unwrap(); + let handle = runtime.handle(); + handle.enable_buggify(); + runtime.block_on(async { + let faults = StorageFaultController::new(always_short_read_config(), StorageFaultDomain::Disk); + let mut reader = FaultableReader::new(Cursor::new(vec![1, 2, 3, 4]), faults.clone()); + + assert_eq!(reader.fill_buf().unwrap(), &[1, 2]); + assert_eq!(faults.summary().short_read, 1); + }); } } diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs index 6b78fc542b0..51cea430fc6 100644 --- a/crates/dst/src/sim/mod.rs +++ b/crates/dst/src/sim/mod.rs @@ -7,14 +7,11 @@ pub(crate) mod commitlog; pub(crate) mod snapshot; pub(crate) mod storage_faults; -pub mod time; use std::{cell::RefCell, future::Future, time::Duration}; pub use spacetimedb_runtime::sim::{yield_now, Handle, JoinHandle, Node, NodeBuilder, NodeId, Rng}; -use crate::seed::DstSeed; - thread_local! { static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; } @@ -40,15 +37,28 @@ pub(crate) fn current_handle() -> Option { CURRENT_HANDLE.with(|slot| slot.borrow().clone()) } +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +fn splitmix64(mut x: u64) -> u64 { + x = x.wrapping_add(GAMMA); + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} + +pub(crate) fn fork_seed(seed: u64, discriminator: u64) -> u64 { + splitmix64(seed ^ discriminator.wrapping_mul(GAMMA)) +} + /// DST-facing wrapper that keeps the top-level seed type local to this crate. pub struct Runtime { inner: spacetimedb_runtime::sim::Runtime, } impl Runtime { - pub fn new(seed: DstSeed) -> anyhow::Result { + pub fn new(seed: u64) -> anyhow::Result { Ok(Self { - inner: spacetimedb_runtime::sim::Runtime::new(seed.0), + inner: spacetimedb_runtime::sim::Runtime::new(seed), }) } @@ -85,28 +95,24 @@ impl Runtime { self.inner.spawn_on(node, future) } - pub fn check_determinism(seed: DstSeed, make_future: fn() -> F) -> F::Output + pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output where F: Future + 'static, F::Output: Send + 'static, { - spacetimedb_runtime::sim_std::check_determinism(seed.0, make_future) + spacetimedb_runtime::sim_std::check_determinism(seed, make_future) } - pub fn check_determinism_with(seed: DstSeed, make_future: M) -> F::Output + pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output where M: Fn() -> F + Clone + Send + 'static, F: Future + 'static, F::Output: Send + 'static, { - spacetimedb_runtime::sim_std::check_determinism(seed.0, make_future) + spacetimedb_runtime::sim_std::check_determinism(seed, make_future) } } - -pub(crate) fn advance_time(duration: Duration) { - time::advance(duration); -} - -pub(crate) fn decision_source(seed: DstSeed) -> Rng { - Rng::new(seed.0) +#[allow(dead_code)] +pub(crate) fn decision_source(seed: u64) -> Rng { + Rng::new(seed) } diff --git a/crates/dst/src/sim/snapshot.rs b/crates/dst/src/sim/snapshot.rs index e3773b07e36..13c0e3a43c3 100644 --- a/crates/dst/src/sim/snapshot.rs +++ b/crates/dst/src/sim/snapshot.rs @@ -14,12 +14,9 @@ use spacetimedb_snapshot::{ }; use spacetimedb_table::{blob_store::BlobStore, page_pool::PagePool, table::Table}; -use crate::{ - seed::DstSeed, - sim::storage_faults::{ - is_injected_fault_text, StorageFaultConfig, StorageFaultController, StorageFaultDomain, StorageFaultKind, - StorageFaultSummary, - }, +use crate::sim::storage_faults::{ + is_injected_fault_text, StorageFaultConfig, StorageFaultController, StorageFaultDomain, StorageFaultKind, + StorageFaultSummary, }; pub(crate) type SnapshotFaultConfig = StorageFaultConfig; @@ -52,10 +49,10 @@ pub(crate) struct BuggifiedSnapshotRepo { } impl BuggifiedSnapshotRepo { - pub(crate) fn new(config: SnapshotFaultConfig, seed: DstSeed) -> anyhow::Result { + pub(crate) fn new(config: SnapshotFaultConfig) -> anyhow::Result { Ok(Self { repo: Arc::new(MemorySnapshotRepository::new(Identity::ZERO, 0)), - faults: StorageFaultController::new(config, StorageFaultDomain::Snapshot, seed), + faults: StorageFaultController::new(config, StorageFaultDomain::Snapshot), }) } @@ -128,6 +125,9 @@ impl SnapshotStore for BuggifiedSnapshotRepo { tx_offset: TxOffset, ) -> Result { self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; self.faults .maybe_error(StorageFaultKind::Open) .map_err(SnapshotError::Io)?; @@ -156,6 +156,9 @@ impl SnapshotStore for BuggifiedSnapshotRepo { fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; self.faults .maybe_error(StorageFaultKind::Metadata) .map_err(SnapshotError::Io)?; @@ -164,6 +167,9 @@ impl SnapshotStore for BuggifiedSnapshotRepo { fn latest_snapshot(&self) -> Result, SnapshotError> { self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; self.faults .maybe_error(StorageFaultKind::Metadata) .map_err(SnapshotError::Io)?; @@ -172,6 +178,9 @@ impl SnapshotStore for BuggifiedSnapshotRepo { fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; self.faults .maybe_error(StorageFaultKind::Metadata) .map_err(SnapshotError::Io)?; @@ -180,6 +189,9 @@ impl SnapshotStore for BuggifiedSnapshotRepo { fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; self.faults .maybe_error(StorageFaultKind::Metadata) .map_err(SnapshotError::Io)?; @@ -217,7 +229,7 @@ impl SnapshotRepo for BuggifiedSnapshotRepo { #[cfg(test)] mod tests { - use crate::{config::CommitlogFaultProfile, seed::DstSeed}; + use crate::{config::CommitlogFaultProfile, sim}; use super::*; @@ -227,7 +239,6 @@ mod tests { fn always_metadata_error() -> SnapshotFaultConfig { SnapshotFaultConfig { - enabled: true, metadata_error_prob: 1.0, ..SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Default) } @@ -235,33 +246,42 @@ mod tests { #[test] fn repo_without_snapshots_is_not_used_for_restore() { - let repo = BuggifiedSnapshotRepo::new(no_faults(), DstSeed(41)).unwrap(); + let mut runtime = sim::Runtime::new(42).unwrap(); + runtime.block_on(async { + let repo = BuggifiedSnapshotRepo::new(no_faults()).unwrap(); - assert!(repo.repo_for_restore(Some(0)).unwrap().store.is_none()); + assert!(repo.repo_for_restore(Some(0)).unwrap().store.is_none()); + }) } #[test] fn injected_metadata_error_is_counted_and_recognizable() { - let repo = BuggifiedSnapshotRepo::new(always_metadata_error(), DstSeed(42)).unwrap(); - repo.enable_faults(); - - let err = match repo.repo_for_restore(Some(0)) { - Ok(_) => panic!("expected injected snapshot metadata error"), - Err(err) => err, - }; - - assert!(is_injected_snapshot_error_text(&err)); - assert_eq!(repo.fault_summary().metadata_error, 1); + let mut runtime = sim::Runtime::new(42).unwrap(); + runtime.block_on(async { + let repo = BuggifiedSnapshotRepo::new(always_metadata_error()).unwrap(); + repo.enable_faults(); + + let err = match repo.repo_for_restore(Some(0)) { + Ok(_) => panic!("expected injected snapshot metadata error"), + Err(err) => err, + }; + + assert!(is_injected_snapshot_error_text(&err)); + assert_eq!(repo.fault_summary().metadata_error, 1); + }) } #[test] fn suspended_faults_allow_restore_probe() { - let repo = BuggifiedSnapshotRepo::new(always_metadata_error(), DstSeed(43)).unwrap(); - repo.enable_faults(); + let mut runtime = sim::Runtime::new(42).unwrap(); + runtime.block_on(async { + let repo = BuggifiedSnapshotRepo::new(always_metadata_error()).unwrap(); + repo.enable_faults(); - let restore = repo.with_faults_suspended(|| repo.repo_for_restore(Some(0))); + let restore = repo.with_faults_suspended(|| repo.repo_for_restore(Some(0))); - assert!(restore.unwrap().store.is_none()); - assert_eq!(repo.fault_summary().metadata_error, 0); + assert!(restore.unwrap().store.is_none()); + assert_eq!(repo.fault_summary().metadata_error, 0); + }) } } diff --git a/crates/dst/src/sim/storage_faults.rs b/crates/dst/src/sim/storage_faults.rs index 65c89dbafe3..a1c59e5ca71 100644 --- a/crates/dst/src/sim/storage_faults.rs +++ b/crates/dst/src/sim/storage_faults.rs @@ -1,4 +1,7 @@ //! Shared storage fault-injection primitives for DST simulation helpers. +//! +//! Fault decisions use [`spacetimedb_runtime::sim::Handle::buggify_with_prob`] +//! so they are gated by the runtime's centralized buggify flag. use std::{ io, @@ -9,7 +12,7 @@ use std::{ time::Duration, }; -use crate::{config::CommitlogFaultProfile, seed::DstSeed, sim}; +use crate::config::CommitlogFaultProfile; const INJECTED_ERROR_PREFIX: &str = "dst injected "; @@ -21,7 +24,6 @@ pub(crate) fn is_injected_fault_text(domain: StorageFaultDomain, text: &str) -> #[derive(Clone, Copy, Debug)] pub(crate) struct StorageFaultConfig { pub(crate) profile: CommitlogFaultProfile, - pub(crate) enabled: bool, pub(crate) latency_prob: f64, pub(crate) long_latency_prob: f64, pub(crate) short_io_prob: f64, @@ -32,6 +34,8 @@ pub(crate) struct StorageFaultConfig { pub(crate) open_error_prob: f64, pub(crate) metadata_error_prob: f64, pub(crate) max_short_io_divisor: usize, + pub(crate) no_space_prob: f64, + pub(crate) partial_failure_prob: f64, } impl StorageFaultConfig { @@ -39,7 +43,6 @@ impl StorageFaultConfig { match profile { CommitlogFaultProfile::Off => Self { profile, - enabled: false, latency_prob: 0.0, long_latency_prob: 0.0, short_io_prob: 0.0, @@ -50,59 +53,59 @@ impl StorageFaultConfig { open_error_prob: 0.0, metadata_error_prob: 0.0, max_short_io_divisor: 2, + no_space_prob: 0.0, + partial_failure_prob: 0.0, }, + // Realistic rare faults: ~1 in 1000 latency, ~1 in 10000 short I/O / errors. CommitlogFaultProfile::Light => Self { profile, - enabled: true, - latency_prob: 0.20, - long_latency_prob: 0.04, - short_io_prob: 0.03, - read_error_prob: 0.0, - write_error_prob: 0.0, - flush_error_prob: 0.0, - fsync_error_prob: 0.0, - open_error_prob: 0.0, - metadata_error_prob: 0.0, + latency_prob: 0.001, + long_latency_prob: 0.0001, + short_io_prob: 0.0001, + read_error_prob: 0.0001, + write_error_prob: 0.0001, + flush_error_prob: 0.0001, + fsync_error_prob: 0.0001, + open_error_prob: 0.0001, + metadata_error_prob: 0.0001, max_short_io_divisor: 2, + no_space_prob: 0.0001, + partial_failure_prob: 0.0001, }, + // Moderate rare faults: ~1 in 500 latency, ~1 in 5000 short I/O / errors. CommitlogFaultProfile::Default => Self { profile, - enabled: true, - latency_prob: 0.35, - long_latency_prob: 0.08, - short_io_prob: 0.08, - read_error_prob: 0.0, - write_error_prob: 0.0, - flush_error_prob: 0.0, - fsync_error_prob: 0.0, - open_error_prob: 0.0, - metadata_error_prob: 0.0, + latency_prob: 0.002, + long_latency_prob: 0.0002, + short_io_prob: 0.0002, + read_error_prob: 0.0002, + write_error_prob: 0.0002, + flush_error_prob: 0.0002, + fsync_error_prob: 0.0002, + open_error_prob: 0.0002, + metadata_error_prob: 0.0002, max_short_io_divisor: 2, + no_space_prob: 0.0002, + partial_failure_prob: 0.0002, }, + // Stress test: ~1 in 10 operations see a fault. CommitlogFaultProfile::Aggressive => Self { profile, - enabled: true, - latency_prob: 0.65, - long_latency_prob: 0.18, - short_io_prob: 0.20, - // Current profile-driven runs stay with latency and short I/O. - // Error hooks are available for targeted tests once targets can - // classify transient storage failures instead of treating them - // as harness errors. - read_error_prob: 0.0, - write_error_prob: 0.0, - flush_error_prob: 0.0, - fsync_error_prob: 0.0, - open_error_prob: 0.0, - metadata_error_prob: 0.0, - max_short_io_divisor: 4, + latency_prob: 0.10, + long_latency_prob: 0.02, + short_io_prob: 0.02, + read_error_prob: 0.01, + write_error_prob: 0.01, + flush_error_prob: 0.01, + fsync_error_prob: 0.01, + open_error_prob: 0.01, + metadata_error_prob: 0.01, + max_short_io_divisor: 2, + no_space_prob: 0.01, + partial_failure_prob: 0.01, }, } } - - pub(crate) fn enabled(&self) -> bool { - self.enabled - } } #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -117,6 +120,8 @@ pub(crate) struct StorageFaultSummary { pub(crate) fsync_error: usize, pub(crate) open_error: usize, pub(crate) metadata_error: usize, + pub(crate) no_space: usize, + pub(crate) partial_failure: usize, } #[derive(Clone, Copy, Debug)] @@ -139,27 +144,25 @@ pub(crate) struct StorageFaultController { config: StorageFaultConfig, domain: StorageFaultDomain, counters: Arc, - decisions: Arc, - time: Option, - armed: Arc, + handle: Option, suspended: Arc, } impl StorageFaultController { - pub(crate) fn new(config: StorageFaultConfig, domain: StorageFaultDomain, seed: DstSeed) -> Self { + pub(crate) fn new(config: StorageFaultConfig, domain: StorageFaultDomain) -> Self { Self { config, domain, counters: Arc::default(), - decisions: Arc::new(sim::decision_source(seed)), - time: sim::time::try_current_handle(), - armed: Arc::new(AtomicBool::new(false)), - suspended: Arc::default(), + handle: crate::sim::current_handle(), + suspended: Arc::new(AtomicUsize::new(0)), } } pub(crate) fn enable(&self) { - self.armed.store(true, Ordering::Relaxed); + if let Some(handle) = &self.handle { + handle.enable_buggify(); + } } pub(crate) fn with_suspended(&self, f: impl FnOnce() -> T) -> T { @@ -171,29 +174,51 @@ impl StorageFaultController { } pub(crate) fn maybe_latency(&self) { - if self.sample(self.config.latency_prob) { + if self.sample_latency(self.config.latency_prob) { self.counters.latency.fetch_add(1, Ordering::Relaxed); - let latency = if self.sample(self.config.long_latency_prob) { + let latency = if self.sample_latency(self.config.long_latency_prob) { Duration::from_millis(25) } else { Duration::from_millis(1) }; - if let Some(time) = &self.time { - time.advance(latency); - } else { - sim::advance_time(latency); + if let Some(handle) = &self.handle { + handle.advance(latency); } } } pub(crate) fn maybe_error(&self, kind: StorageFaultKind) -> io::Result<()> { - if self.sample(kind.probability(&self.config)) { + let prob = kind.probability(&self.config); + if self.sample(prob) { + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + return Err(io::Error::new(kind.error_kind(), kind.message(self.domain))); + } + Ok(()) + } + + pub(crate) fn check_pending_error(&self, kind: StorageFaultKind) -> io::Result<()> { + if self.counters.pending_error.swap(false, Ordering::Relaxed) { kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); - return Err(io::Error::other(kind.message(self.domain))); + self.counters.partial_failure.fetch_add(1, Ordering::Relaxed); + return Err(io::Error::new(kind.error_kind(), kind.message(self.domain))); } Ok(()) } + pub(crate) fn arm_pending_error(&self) { + self.counters.pending_error.store(true, Ordering::Relaxed); + } + + pub(crate) fn sample_partial_failure(&self) -> bool { + if !self.active() || self.config.partial_failure_prob <= 0.0 { + return false; + } + match &self.handle { + Some(handle) => handle.buggify_with_prob(self.config.partial_failure_prob), + None => false, + } + } + pub(crate) fn maybe_short_len(&self, len: usize, kind: ShortIoKind) -> usize { if len <= 1 { return len; @@ -201,7 +226,6 @@ impl StorageFaultController { if !self.sample(self.config.short_io_prob) { return len; } - kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); let divisor = self.config.max_short_io_divisor.max(2); (len / divisor).max(1) @@ -219,19 +243,33 @@ impl StorageFaultController { fsync_error: self.counters.fsync_error.load(Ordering::Relaxed) as usize, open_error: self.counters.open_error.load(Ordering::Relaxed) as usize, metadata_error: self.counters.metadata_error.load(Ordering::Relaxed) as usize, + no_space: self.counters.no_space.load(Ordering::Relaxed) as usize, + partial_failure: self.counters.partial_failure.load(Ordering::Relaxed) as usize, } } fn active(&self) -> bool { - self.config.enabled() && self.armed.load(Ordering::Relaxed) && self.suspended.load(Ordering::Relaxed) == 0 + self.suspended.load(Ordering::Relaxed) == 0 } fn sample(&self, probability: f64) -> bool { - if !self.active() || probability <= 0.0 { + if probability <= 0.0 || !self.active() { return false; } + match &self.handle { + Some(handle) => handle.buggify_with_prob(probability), + None => false, + } + } - self.decisions.sample_probability(probability) + fn sample_latency(&self, probability: f64) -> bool { + if probability <= 0.0 { + return false; + } + match &self.handle { + Some(handle) => handle.buggify_with_prob(probability), + None => false, + } } } @@ -256,6 +294,9 @@ struct FaultCounters { fsync_error: AtomicU64, open_error: AtomicU64, metadata_error: AtomicU64, + no_space: AtomicU64, + partial_failure: AtomicU64, + pending_error: AtomicBool, } #[derive(Clone, Copy)] @@ -281,6 +322,7 @@ pub(crate) enum StorageFaultKind { Fsync, Open, Metadata, + NoSpace, } impl StorageFaultKind { @@ -292,6 +334,7 @@ impl StorageFaultKind { Self::Fsync => config.fsync_error_prob, Self::Open => config.open_error_prob, Self::Metadata => config.metadata_error_prob, + Self::NoSpace => config.no_space_prob, } } @@ -303,18 +346,27 @@ impl StorageFaultKind { Self::Fsync => &counters.fsync_error, Self::Open => &counters.open_error, Self::Metadata => &counters.metadata_error, + Self::NoSpace => &counters.no_space, + } + } + + fn error_kind(self) -> io::ErrorKind { + match self { + Self::NoSpace => io::ErrorKind::StorageFull, + _ => io::ErrorKind::Other, } } fn message(self, domain: StorageFaultDomain) -> String { - let action = match self { - Self::Read => "read", - Self::Write => "write", - Self::Flush => "flush", - Self::Fsync => "fsync", - Self::Open => "open", - Self::Metadata => "metadata", - }; - format!("{INJECTED_ERROR_PREFIX}{} {action} error", domain.label()) + let label = domain.label(); + match self { + Self::Read => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Write => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Flush => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Fsync => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Open => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Metadata => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::NoSpace => format!("{INJECTED_ERROR_PREFIX}{label} no space left on device"), + } } } diff --git a/crates/dst/src/sim/time.rs b/crates/dst/src/sim/time.rs index f55d6378658..bdeae0fbb58 100644 --- a/crates/dst/src/sim/time.rs +++ b/crates/dst/src/sim/time.rs @@ -36,11 +36,11 @@ mod tests { time::Duration, }; - use crate::{seed::DstSeed, sim}; + use crate::sim; #[test] fn sleep_fast_forwards_virtual_time() { - let mut runtime = sim::Runtime::new(DstSeed(101)).unwrap(); + let mut runtime = sim::Runtime::new(101).unwrap(); runtime.block_on(async { assert_eq!(super::now(), Duration::ZERO); @@ -51,7 +51,7 @@ mod tests { #[test] fn shorter_timer_wakes_first() { - let mut runtime = sim::Runtime::new(DstSeed(102)).unwrap(); + let mut runtime = sim::Runtime::new(102).unwrap(); let handle = runtime.handle(); let order = Arc::new(Mutex::new(Vec::new())); @@ -81,7 +81,7 @@ mod tests { #[test] fn explicit_advance_moves_virtual_time() { - let mut runtime = sim::Runtime::new(DstSeed(103)).unwrap(); + let mut runtime = sim::Runtime::new(103).unwrap(); runtime.block_on(async { super::advance(Duration::from_millis(7)); @@ -91,7 +91,7 @@ mod tests { #[test] fn timeout_returns_future_output_before_deadline() { - let mut runtime = sim::Runtime::new(DstSeed(104)).unwrap(); + let mut runtime = sim::Runtime::new(104).unwrap(); let output = runtime.block_on(async { super::timeout(Duration::from_millis(10), async { @@ -107,7 +107,7 @@ mod tests { #[test] fn timeout_expires_at_virtual_deadline() { - let mut runtime = sim::Runtime::new(DstSeed(105)).unwrap(); + let mut runtime = sim::Runtime::new(105).unwrap(); let output = runtime.block_on(async { super::timeout(Duration::from_millis(4), async { diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index 5a387625536..1a00c77a937 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -2,60 +2,39 @@ use std::{future::Future, pin::Pin}; -use crate::{config::RunConfig, seed::DstSeed}; +use crate::{config::RunConfig, workload::table_ops::TableScenarioId}; /// Descriptor contract: CLI talks to this, not per-target ad hoc handlers. pub trait TargetDescriptor { const NAME: &'static str; type Scenario; - fn prepare(_seed: DstSeed, _scenario: &Self::Scenario, _config: &RunConfig) -> anyhow::Result<()> { + fn prepare(_seed: u64, _scenario: &Self::Scenario, _config: &RunConfig) -> anyhow::Result<()> { Ok(()) } - fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture; + fn run_streaming(seed: u64, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture; } pub type TargetRunFuture = Pin>>>; -pub struct RelationalDbConcurrentDescriptor; +pub struct RelationalDbCommitlogDescriptor; -impl TargetDescriptor for RelationalDbConcurrentDescriptor { - const NAME: &'static str = "relational_db_concurrent"; - type Scenario = (); +impl TargetDescriptor for RelationalDbCommitlogDescriptor { + const NAME: &'static str = "relational-db-commitlog"; + type Scenario = TableScenarioId; - fn run_streaming(seed: DstSeed, _scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { + fn run_streaming(seed: u64, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { Box::pin(async move { - let outcome = crate::targets::relational_db_concurrent::run_generated_with_config(seed, config).await?; - Ok(format_relational_db_concurrent_outcome(Self::NAME, seed, &outcome)) + let outcome = + crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config) + .await?; + Ok(format!( + "ok target={} seed={} steps={}", + Self::NAME, + seed, + outcome.final_row_counts.iter().sum::(), + )) }) } } - -fn format_relational_db_concurrent_outcome( - target: &str, - seed: DstSeed, - outcome: &crate::targets::relational_db_concurrent::RelationalDbConcurrentOutcome, -) -> String { - format!( - concat!( - "ok target={} seed={} rounds={}\n", - "\n", - "clients={} events={} reads={}\n", - "transactions: committed={} write_conflicts={} writer_conflicts={} reader_conflicts={}\n", - "rows: final={} expected={}" - ), - target, - seed.0, - outcome.rounds, - outcome.clients, - outcome.events, - outcome.reads, - outcome.committed, - outcome.write_conflicts, - outcome.writer_conflicts, - outcome.reader_conflicts, - outcome.final_rows.len(), - outcome.expected_rows.len(), - ) -} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs index fba30fb371f..51a483d73a2 100644 --- a/crates/dst/src/targets/mod.rs +++ b/crates/dst/src/targets/mod.rs @@ -2,4 +2,3 @@ pub mod descriptor; pub mod relational_db_commitlog; -pub mod relational_db_concurrent; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 2c58b32a287..5a116a6e3aa 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -1,306 +1,159 @@ -//! RelationalDB DST target with mocked commitlog file chaos and replay checks. +//! Simple RelationalDB DST target — table operations only. -use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, sync::Arc, time::Duration}; +use std::ops::Bound; +use std::sync::Arc; -use spacetimedb_commitlog::repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}; +use spacetimedb_commitlog::repo::mem::Memory; use spacetimedb_core::{ - db::{ - relational_db::{MutTx as RelMutTx, Persistence, RelationalDB, SnapshotWorker, Tx as RelTx}, - snapshot, - }, - error::{DBError, DatastoreError, IndexError}, + db::persistence::{DiskSizeFn, Persistence}, + db::relational_db::{MutTx as RelMutTx, RelationalDB, Tx as RelTx}, + error::DBError, messages::control_db::HostType, }; -use spacetimedb_datastore::{ - execution_context::Workload, - traits::{IsolationLevel, Program}, -}; -use spacetimedb_durability::{Durability, EmptyHistory, Local}; +use spacetimedb_datastore::{execution_context::Workload, traits::IsolationLevel}; +use spacetimedb_durability::local::Options as DurabilityOpts; +use spacetimedb_durability::Local as DurabilityLocal; use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, }; -use spacetimedb_primitives::{SequenceId, TableId}; -use spacetimedb_runtime::Handle; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; +use spacetimedb_primitives::TableId; +use spacetimedb_runtime::Handle as RuntimeHandle; +use spacetimedb_sats::AlgebraicValue; use spacetimedb_schema::{ def::BTreeAlgorithm, - schema::{ColumnSchema, ConstraintSchema, IndexSchema, SequenceSchema, TableSchema}, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, table_name::TableName, }; +use spacetimedb_snapshot::SnapshotStore; use spacetimedb_table::page_pool::PagePool; -use tracing::{debug, info, trace}; +use tracing::{info, trace}; use crate::{ client::SessionId, - config::RunConfig, + config::{CommitlogFaultProfile, RunConfig}, core::{self, TargetEngine}, properties::{ - CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableMutation, TableObservation, - TargetPropertyAccess, + PropertyRuntime, TableMutation, TableObservation, TargetPropertyAccess, }, schema::{SchemaPlan, SimRow}, - seed::DstSeed, sim::{ - self, - commitlog::{is_injected_disk_error_text, CommitlogFaultConfig, CommitlogFaultSummary, FaultableRepo}, - snapshot::{is_injected_snapshot_error_text, BuggifiedSnapshotRepo, SnapshotFaultConfig}, + commitlog::{CommitlogFaultConfig, FaultableRepo}, + fork_seed, + snapshot::BuggifiedSnapshotRepo, + storage_faults::StorageFaultConfig, + Rng, }, - workload::{ - commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary}, - commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, - table_ops::{ - ConnectionWriteState, TableErrorKind, TableInteractionCase, TableOperation, TableScenario, TableScenarioId, - TableWorkloadInteraction, TableWorkloadOutcome, - }, + workload::table_ops::{ + ConnectionWriteState, TableErrorKind, TableOperation, TableScenario, TableScenarioId, TableWorkloadInteraction, + TableWorkloadOutcome, TableWorkloadSource, }, }; -pub type RelationalDbCommitlogOutcome = CommitlogWorkloadOutcome; -type RelationalDbCommitlogSource = crate::workload::commitlog_ops::CommitlogWorkloadSource; -type RelationalDbCommitlogProperties = PropertyRuntime; - -const DURABILITY_WAIT_TIMEOUT: Duration = Duration::from_secs(30); +pub type RelationalDbTableOutcome = TableWorkloadOutcome; pub async fn run_generated_with_config_and_scenario( - seed: DstSeed, + seed: u64, scenario: TableScenarioId, config: RunConfig, -) -> anyhow::Result { - let (source, engine, properties) = build(seed, scenario, &config)?; - let outcome = core::run_streaming(source, engine, properties, config).await?; - info!( - applied_steps = outcome.applied_steps, - durable_commit_count = outcome.durable_commit_count, - replay_table_count = outcome.replay_table_count, - "relational_db_commitlog complete" - ); - Ok(outcome) -} - -fn build( - seed: DstSeed, - scenario: TableScenarioId, - config: &RunConfig, -) -> anyhow::Result<( - RelationalDbCommitlogSource, - RelationalDbEngine, - RelationalDbCommitlogProperties, -)> { - build_with_fault_configs( - seed, - scenario, - config, - CommitlogFaultConfig::for_profile(config.commitlog_fault_profile), - SnapshotFaultConfig::for_profile(config.commitlog_fault_profile), - ) -} - -fn build_with_fault_configs( - seed: DstSeed, - scenario: TableScenarioId, - config: &RunConfig, - commitlog_fault_config: CommitlogFaultConfig, - snapshot_fault_config: SnapshotFaultConfig, -) -> anyhow::Result<( - RelationalDbCommitlogSource, - RelationalDbEngine, - RelationalDbCommitlogProperties, -)> { - let mut connection_rng = seed.fork(121).rng(); - let num_connections = connection_rng.index(3) + 1; - let mut schema_rng = seed.fork(122).rng(); - let schema = scenario.generate_schema(&mut schema_rng); - let generator = crate::workload::commitlog_ops::CommitlogWorkloadSource::new( +) -> anyhow::Result { + let num_connections = { + let rng = Rng::new(fork_seed(seed, 121)); + rng.index(3) + 1 + }; + let schema_rng = Rng::new(fork_seed(seed, 122)); + let schema = scenario.generate_schema(&schema_rng); + let source = TableWorkloadSource::new( seed, scenario, schema.clone(), num_connections, config.max_interactions_or_default(usize::MAX), ); - let engine = RelationalDbEngine::new_with_fault_configs( - seed, - &schema, - num_connections, - commitlog_fault_config, - snapshot_fault_config, - )?; - let properties = PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections); - Ok((generator, engine, properties)) -} - -#[derive(Clone, Debug)] -struct DynamicTableState { - name: String, - version: u32, - table_id: TableId, -} -#[derive(Default)] -struct RunStats { - interactions: InteractionSummary, - table_ops: TableOperationSummary, - transactions: TransactionStats, - runtime: RuntimeStats, -} - -#[derive(Default)] -struct TransactionStats { - explicit_begin: usize, - explicit_commit: usize, - explicit_rollback: usize, - auto_commit: usize, - read_tx: Cell, -} - -#[derive(Default)] -struct RuntimeStats { - durability_actors_started: usize, -} - -impl RunStats { - fn record_interaction_requested(&mut self, interaction: &CommitlogInteraction) { - match interaction { - CommitlogInteraction::Table(_) => self.interactions.table += 1, - CommitlogInteraction::CreateDynamicTable { .. } => self.interactions.create_dynamic_table += 1, - CommitlogInteraction::DropDynamicTable { .. } => self.interactions.drop_dynamic_table += 1, - CommitlogInteraction::MigrateDynamicTable { .. } => self.interactions.migrate_dynamic_table += 1, - CommitlogInteraction::CloseReopen => self.interactions.close_reopen_requested += 1, - } - } + let sim_handle = crate::sim::current_handle().expect("must run inside sim Runtime::block_on"); + let rt_handle = RuntimeHandle::simulation(sim_handle.clone()); - fn record_interaction_result(&mut self, interaction: &CommitlogInteraction, observation: &CommitlogObservation) { - if matches!(observation, CommitlogObservation::Skipped) { - self.interactions.skipped += 1; - } - if matches!(interaction, CommitlogInteraction::CloseReopen) { - match observation { - CommitlogObservation::Skipped => self.interactions.close_reopen_skipped += 1, - CommitlogObservation::Applied | CommitlogObservation::DurableReplay(_) => { - self.interactions.close_reopen_applied += 1 - } - _ => {} - } - } - } + // Build faulty commitlog + persistence + let clog_repo = FaultableRepo::new( + Memory::unlimited(), + CommitlogFaultConfig::for_profile(CommitlogFaultProfile::Default), + ); + let local = DurabilityLocal::open_with_repo(clog_repo, rt_handle.clone(), DurabilityOpts::default())?; + let history = local.as_history(); + let durability = Arc::new(local); - fn record_table_operation(&mut self, case: TableInteractionCase) { - match case { - TableInteractionCase::BeginTx => self.table_ops.begin_tx += 1, - TableInteractionCase::CommitTx => self.table_ops.commit_tx += 1, - TableInteractionCase::RollbackTx => self.table_ops.rollback_tx += 1, - TableInteractionCase::BeginReadTx => self.table_ops.begin_read_tx += 1, - TableInteractionCase::ReleaseReadTx => self.table_ops.release_read_tx += 1, - TableInteractionCase::BeginTxConflict => self.table_ops.begin_tx_conflict += 1, - TableInteractionCase::WriteConflictInsert => self.table_ops.write_conflict_insert += 1, - TableInteractionCase::Insert => self.table_ops.insert += 1, - TableInteractionCase::Delete => self.table_ops.delete += 1, - TableInteractionCase::ExactDuplicateInsert => self.table_ops.exact_duplicate_insert += 1, - TableInteractionCase::UniqueKeyConflictInsert => self.table_ops.unique_key_conflict_insert += 1, - TableInteractionCase::DeleteMissing => self.table_ops.delete_missing += 1, - TableInteractionCase::BatchInsert => self.table_ops.batch_insert += 1, - TableInteractionCase::BatchDelete => self.table_ops.batch_delete += 1, - TableInteractionCase::Reinsert => self.table_ops.reinsert += 1, - TableInteractionCase::AddColumn => self.table_ops.add_column += 1, - TableInteractionCase::AddIndex => self.table_ops.add_index += 1, - TableInteractionCase::PointLookup => self.table_ops.point_lookup += 1, - TableInteractionCase::PredicateCount => self.table_ops.predicate_count += 1, - TableInteractionCase::RangeScan => self.table_ops.range_scan += 1, - TableInteractionCase::FullScan => self.table_ops.full_scan += 1, - } - } + // Build faulty snapshot store + let snap_repo = Arc::new(BuggifiedSnapshotRepo::new( + StorageFaultConfig::for_profile(CommitlogFaultProfile::Default), + )?) as Arc; - fn record_read_tx(&self) { - self.transactions - .read_tx - .set(self.transactions.read_tx.get().saturating_add(1)); - } + // Enable buggify after setup so initial replay is fault-free + sim_handle.enable_buggify(); - fn transaction_summary(&self, durable_commit_count: usize) -> TransactionSummary { - TransactionSummary { - explicit_begin: self.transactions.explicit_begin, - explicit_commit: self.transactions.explicit_commit, - explicit_rollback: self.transactions.explicit_rollback, - auto_commit: self.transactions.auto_commit, - read_tx: self.transactions.read_tx.get(), - durable_commit_count, - } - } - - fn runtime_summary(&self) -> RuntimeSummary { - RuntimeSummary { - known_runtime_tasks_scheduled: self.runtime.durability_actors_started, - durability_actors_started: self.runtime.durability_actors_started, - runtime_alive_tasks: runtime_alive_tasks(), - } - } -} + let persistence = Persistence { + durability, + disk_size: { + use std::io; + use spacetimedb_commitlog::repo::SizeOnDisk; + Arc::new(|| io::Result::Ok(SizeOnDisk { total_bytes: 0, total_blocks: 0 })) as DiskSizeFn + }, + snapshot_store: Some(snap_repo), + snapshots: None, + runtime: rt_handle, + }; -struct ReopenedRelationalDb { - durability: Arc, - db: RelationalDB, - restored_snapshot_offset: Option, - latest_snapshot_offset: Option, - snapshot_worker: SnapshotWorker, + let engine = RelationalDbEngine::new(seed, &schema, num_connections, history, Some(persistence))?; + let properties = PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections); + let outcome = core::run_streaming(source, engine, properties, config).await?; + info!( + applied_steps = outcome.final_row_counts.iter().sum::(), + "relational_db_table complete" + ); + Ok(outcome) } -/// Engine executing mixed table+lifecycle interactions while recording mocked durable history. struct RelationalDbEngine { db: Option, execution: ConnectionWriteState, read_tx_by_connection: Vec>, base_schema: SchemaPlan, base_table_ids: Vec, - dynamic_tables: BTreeMap, step: usize, - last_requested_durable_offset: Option, - last_observed_durable_offset: Option, - last_restored_snapshot_offset: Option, - latest_snapshot_offset: Option, - durability: Arc, - durability_opts: spacetimedb_durability::local::Options, - commitlog_repo: StressCommitlogRepo, - snapshot_repo: StressSnapshotRepo, - snapshot_worker: SnapshotWorker, - stats: RunStats, } impl RelationalDbEngine { - fn new_with_fault_configs( - seed: DstSeed, - schema: &SchemaPlan, - num_connections: usize, - commitlog_fault_config: CommitlogFaultConfig, - snapshot_fault_config: SnapshotFaultConfig, + fn new>( + _seed: u64, schema: &SchemaPlan, num_connections: usize, + history: H, persistence: Option, ) -> anyhow::Result { - let bootstrap = bootstrap_relational_db(seed.fork(700), commitlog_fault_config, snapshot_fault_config)?; - let mut this = Self { - db: Some(bootstrap.db), + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + history, + persistence, + None, + PagePool::new_for_test(), + )?; + assert_eq!(connected_clients.len(), 0); + db.with_auto_commit(Workload::Internal, |tx| { + db.set_initialized(tx, spacetimedb_datastore::traits::Program::empty(HostType::Wasm.into())) + })?; + + let mut engine = Self { + db: Some(db), execution: ConnectionWriteState::new(num_connections), read_tx_by_connection: (0..num_connections).map(|_| None).collect(), base_schema: schema.clone(), base_table_ids: Vec::with_capacity(schema.tables.len()), - dynamic_tables: BTreeMap::new(), step: 0, - last_requested_durable_offset: None, - last_observed_durable_offset: None, - last_restored_snapshot_offset: None, - latest_snapshot_offset: None, - durability: bootstrap.durability, - durability_opts: bootstrap.durability_opts, - commitlog_repo: bootstrap.commitlog_repo, - snapshot_repo: bootstrap.snapshot_repo, - snapshot_worker: bootstrap.snapshot_worker, - stats: RunStats { - runtime: RuntimeStats::default(), - ..Default::default() - }, }; - this.install_base_schema().map_err(anyhow::Error::msg)?; - this.refresh_observed_durable_offset(true).map_err(anyhow::Error::msg)?; - this.commitlog_repo.enable_faults(); - this.snapshot_repo.enable_faults(); - Ok(this) + engine.install_base_schema().map_err(anyhow::Error::msg)?; + Ok(engine) + } + + fn db(&self) -> Result<&RelationalDB, String> { + self.db.as_ref().ok_or_else(|| "relational db not initialized".to_string()) } fn install_base_schema(&mut self) -> Result<(), String> { @@ -352,171 +205,25 @@ impl RelationalDbEngine { .map_err(|err| format!("create table '{}' failed: {err}", table.name))?; self.base_table_ids.push(table_id); } - let committed = self + let _ = self .db()? .commit_tx(tx) .map_err(|err| format!("install base schema commit failed: {err}"))?; - self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); Ok(()) } - async fn execute(&mut self, interaction: &CommitlogInteraction) -> Result { + fn execute(&mut self, interaction: &TableWorkloadInteraction) -> Result { self.step = self.step.saturating_add(1); - self.stats.record_interaction_requested(interaction); - let observation = match interaction { - CommitlogInteraction::Table(op) => self.execute_table_op(op).map(CommitlogObservation::Table), - CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), - CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), - CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), - CommitlogInteraction::CloseReopen => self.close_and_reopen().await, - }?; - if !matches!(interaction, CommitlogInteraction::CloseReopen) { - self.wait_for_requested_durability(false).await?; - } - self.stats.record_interaction_result(interaction, &observation); - Ok(observation) - } - - async fn close_and_reopen(&mut self) -> Result { - if self.execution.active_writer.is_some() - || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) - || self.read_tx_by_connection.iter().any(|tx| tx.is_some()) - { - trace!("skip close/reopen while transaction is open"); - return Ok(CommitlogObservation::Skipped); - } - - self.wait_for_requested_durability(true).await?; - // Explicitly drop the current RelationalDB instance before attempting - // to open a new durability+DB pair on the same replica directory. - let old_db = self - .db - .take() - .ok_or_else(|| "close/reopen failed: relational db not initialized".to_string())?; - old_db.shutdown().await; - drop(old_db); - info!("starting in-memory durability"); - - let reopened = self.reopen_from_history_with_fault_retry("close/reopen")?; - - self.durability = reopened.durability; - self.db = Some(reopened.db); - self.last_restored_snapshot_offset = reopened.restored_snapshot_offset; - self.latest_snapshot_offset = reopened.latest_snapshot_offset; - self.snapshot_worker = reopened.snapshot_worker; - self.rebuild_table_handles_after_reopen()?; - self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); - let replay = self.durable_replay_summary()?; - debug!( - base_tables = self.base_table_ids.len(), - dynamic_tables = self.dynamic_tables.len(), - "reopened relational db from durable history" - ); - Ok(CommitlogObservation::DurableReplay(replay)) - } - - fn reopen_from_history_with_fault_retry(&self, context: &'static str) -> Result { - match self.reopen_from_history() { - Ok(reopened) => Ok(reopened), - Err(err) if is_injected_disk_error_text(&err) || is_injected_snapshot_error_text(&err) => { - trace!(error = %err, "retrying {context} with injected storage faults suspended"); - self.commitlog_repo - .with_faults_suspended(|| self.snapshot_repo.with_faults_suspended(|| self.reopen_from_history())) - } - Err(err) => Err(err), - } - } - - fn reopen_from_history(&self) -> Result { - let runtime = Handle::tokio_current(); - let durability = Arc::new( - InMemoryCommitlogDurability::open_with_repo( - self.commitlog_repo.clone(), - runtime.clone(), - self.durability_opts, - ) - .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, - ); - let durable_offset = durability.durable_tx_offset().last_seen(); - let snapshot_restore = self.snapshot_repo.repo_for_restore(durable_offset)?; - let snapshot_worker = SnapshotWorker::new( - Arc::new(self.snapshot_repo.clone()), - snapshot::Compression::Disabled, - runtime.clone(), - ); - let persistence = Persistence { - durability: durability.clone(), - disk_size: Arc::new(in_memory_size_on_disk), - snapshot_store: snapshot_restore.store.clone(), - snapshots: Some(snapshot_worker.clone()), - runtime, - }; - let (db, connected_clients) = RelationalDB::open( - Identity::ZERO, - Identity::ZERO, - durability.as_history(), - Some(persistence), - None, - PagePool::new_for_test(), - ) - .map_err(|err| format!("close/reopen failed: {err}"))?; - if !connected_clients.is_empty() { - return Err(format!( - "unexpected connected clients after reopen: {connected_clients:?}" - )); - } - Ok(ReopenedRelationalDb { - durability, - db, - restored_snapshot_offset: snapshot_restore.restored_snapshot_offset, - latest_snapshot_offset: snapshot_restore.latest_snapshot_offset, - snapshot_worker, - }) - } - - fn rebuild_table_handles_after_reopen(&mut self) -> Result<(), String> { - let db = self.db()?; - let tx = db.begin_tx(Workload::ForTests); - self.stats.record_read_tx(); - let schemas = db - .get_all_tables(&tx) - .map_err(|err| format!("list tables after reopen failed: {err}"))?; - let _ = db.release_tx(tx); - - let mut by_name = BTreeMap::new(); - for schema in schemas { - by_name.insert(schema.table_name.to_string(), schema.table_id); - } - - self.base_table_ids.clear(); - for table in &self.base_schema.tables { - let table_id = by_name - .get(&table.name) - .copied() - .ok_or_else(|| format!("base table '{}' missing after reopen", table.name))?; - self.base_table_ids.push(table_id); - } - - self.dynamic_tables.retain(|_slot, state| { - if let Some(table_id) = by_name.get(&state.name).copied() { - state.table_id = table_id; - true - } else { - false - } - }); - - Ok(()) + self.execute_table_op(interaction) } fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result { + trace!(step = self.step, op = ?interaction.op, "table interaction"); let observation = self.execute_table_op_inner(&interaction.op)?; - self.stats.record_table_operation(interaction.case); Ok(observation) } fn execute_table_op_inner(&mut self, op: &TableOperation) -> Result { - trace!(step = self.step, ?op, "table interaction"); match op { TableOperation::BeginTx { conn } => self.begin_write_tx(*conn), TableOperation::BeginReadTx { conn } => { @@ -529,7 +236,6 @@ impl RelationalDbEngine { } let tx = self.db()?.begin_tx(Workload::ForTests); self.read_tx_by_connection[conn.as_index()] = Some(tx); - self.stats.record_read_tx(); Ok(TableObservation::Applied) } TableOperation::ReleaseReadTx { conn } => { @@ -545,13 +251,11 @@ impl RelationalDbEngine { let tx = self.execution.tx_by_connection[conn.as_index()] .take() .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; - let committed = self + let _ = self .db()? .commit_tx(tx) .map_err(|err| format!("commit interaction failed: {err}"))?; - self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); self.execution.active_writer = None; - self.stats.transactions.explicit_commit += 1; Ok(TableObservation::CommitOrRollback) } TableOperation::RollbackTx { conn } => { @@ -561,7 +265,6 @@ impl RelationalDbEngine { .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; let _ = self.db()?.rollback_mut_tx(tx); self.execution.active_writer = None; - self.stats.transactions.explicit_rollback += 1; Ok(TableObservation::CommitOrRollback) } TableOperation::InsertRows { conn, table, rows } => self.execute_insert_rows(*conn, *table, rows), @@ -572,30 +275,27 @@ impl RelationalDbEngine { column, default, } => { - let table_id = self.with_mut_tx(*conn, |engine, tx| { - let table_id = engine.table_id_for_index(*table)?; - let column_idx = engine.base_schema.tables[*table].columns.len() as u16; - let mut columns = engine.base_schema.tables[*table] - .columns - .iter() - .enumerate() - .map(|(idx, existing)| ColumnSchema::for_test(idx as u16, &existing.name, existing.ty.clone())) - .collect::>(); - columns.push(ColumnSchema::for_test(column_idx, &column.name, column.ty.clone())); + let table_id = self.table_id_for_index(*table)?; + let column_idx = self.base_schema.tables[*table].columns.len() as u16; + let mut columns = self.base_schema.tables[*table] + .columns + .iter() + .enumerate() + .map(|(idx, existing)| ColumnSchema::for_test(idx as u16, &existing.name, existing.ty.clone())) + .collect::>(); + columns.push(ColumnSchema::for_test(column_idx, &column.name, column.ty.clone())); + self.with_mut_tx(*conn, |engine, tx| { let new_table_id = engine .db()? - .add_columns_to_table(tx, table_id, columns, vec![default.clone()]) + .add_columns_to_table(tx, table_id, columns.clone(), vec![default.clone()]) .map_err(|err| format!("add column failed: {err}"))?; Ok(new_table_id) })?; - self.base_table_ids[*table] = table_id; - self.base_schema.tables[*table].columns.push(column.clone()); - self.refresh_observed_durable_offset(false)?; Ok(TableObservation::Applied) } TableOperation::AddIndex { conn, table, cols } => { + let table_id = self.table_id_for_index(*table)?; self.with_mut_tx(*conn, |engine, tx| { - let table_id = engine.table_id_for_index(*table)?; let mut schema = IndexSchema::for_test( format!( "{}_dst_added_{}_idx", @@ -614,7 +314,6 @@ impl RelationalDbEngine { if !self.base_schema.tables[*table].extra_indexes.contains(cols) { self.base_schema.tables[*table].extra_indexes.push(cols.clone()); } - self.refresh_observed_durable_offset(false)?; Ok(TableObservation::Applied) } TableOperation::PointLookup { conn, table, id } => { @@ -690,12 +389,13 @@ impl RelationalDbEngine { } self.execution.tx_by_connection[conn.as_index()] = Some(tx); self.execution.active_writer = Some(conn); - self.stats.transactions.explicit_begin += 1; Ok(TableObservation::Applied) } None => { if self.execution.active_writer.is_some() || self.any_open_read_tx() { - Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)) + Ok(TableObservation::ObservedError( + TableErrorKind::WriteConflict, + )) } else { Err(format!( "connection {conn} failed to begin write transaction without an open conflicting lock" @@ -765,10 +465,7 @@ impl RelationalDbEngine { outcome: Result, TableErrorKind>, String>, ) -> Result { match outcome { - Ok(Ok(mutations)) => { - self.refresh_if_auto_commit(in_tx)?; - Ok(TableObservation::Mutated { conn, mutations, in_tx }) - } + Ok(Ok(mutations)) => Ok(TableObservation::Mutated { conn, mutations, in_tx }), Ok(Err(kind)) => Ok(TableObservation::ObservedError(kind)), Err(err) if is_write_conflict_error(&err) => { Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)) @@ -817,16 +514,11 @@ impl RelationalDbEngine { return Err(err); } }; - let committed = match self.db()?.commit_tx(tx) { - Ok(committed) => committed, - Err(err) => { - self.execution.active_writer = None; - return Err(format!("auto-commit write failed: {err}")); - } - }; - self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); + let _ = self + .db()? + .commit_tx(tx) + .map_err(|err| format!("auto-commit write failed: {err}"))?; self.execution.active_writer = None; - self.stats.transactions.auto_commit += 1; Ok(Ok(value)) } @@ -867,16 +559,11 @@ impl RelationalDbEngine { return Err(err); } }; - let committed = match self.db()?.commit_tx(tx) { - Ok(committed) => committed, - Err(err) => { - self.execution.active_writer = None; - return Err(format!("auto-commit write failed: {err}")); - } - }; - self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); + let _ = self + .db()? + .commit_tx(tx) + .map_err(|err| format!("auto-commit write failed: {err}"))?; self.execution.active_writer = None; - self.stats.transactions.auto_commit += 1; Ok(value) } @@ -887,15 +574,6 @@ impl RelationalDbEngine { row: &SimRow, ) -> Result, String> { let table_id = self.table_id_for_index(table)?; - self.try_insert_row(tx, table_id, row) - } - - fn try_insert_row( - &self, - tx: &mut RelMutTx, - table_id: TableId, - row: &SimRow, - ) -> Result, String> { let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; Ok(match self.db()?.insert(tx, table_id, &bsatn) { Ok((_, row_ref, _)) => Ok(SimRow::from_product_value(row_ref.to_product_value())), @@ -903,198 +581,15 @@ impl RelationalDbEngine { }) } - fn insert_row( - &self, - tx: &mut RelMutTx, - table_id: TableId, - row: &SimRow, - context: impl Into, - ) -> Result { - let context = context.into(); - self.try_insert_row(tx, table_id, row)? - .map_err(|err| format!("{context}: {err}")) - } - fn delete_base_row_count(&self, tx: &mut RelMutTx, table: usize, row: &SimRow) -> Result { let table_id = self.table_id_for_index(table)?; Ok(self.db()?.delete_by_rel(tx, table_id, [row.to_product_value()])) } - fn create_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { - if self.execution.active_writer.is_some() || self.any_open_read_tx() { - trace!( - step = self.step, - slot, - "skip create dynamic table while transaction is open" - ); - return Ok(CommitlogObservation::Skipped); - } - let conn = self.normalize_conn(conn); - debug!(step = self.step, conn = %conn, slot, "create dynamic table"); - self.with_mut_tx(conn, |engine, tx| { - if engine.dynamic_tables.contains_key(&slot) { - return Ok(()); - } - let name = dynamic_table_name(slot); - let schema = dynamic_schema(&name, 0); - let table_id = engine - .db()? - .create_table(tx, schema) - .map_err(|err| format!("create dynamic table slot={slot} failed: {err}"))?; - let seed_row = SimRow { - values: vec![AlgebraicValue::I64(0), AlgebraicValue::U64(slot as u64)], - }; - engine.insert_row( - tx, - table_id, - &seed_row, - format!("seed dynamic table auto-inc insert failed for slot={slot}"), - )?; - engine.dynamic_tables.insert( - slot, - DynamicTableState { - name, - version: 0, - table_id, - }, - ); - Ok(()) - })?; - self.refresh_observed_durable_offset(false)?; - Ok(CommitlogObservation::Applied) - } - - fn drop_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { - if self.execution.active_writer.is_some() || self.any_open_read_tx() { - trace!( - step = self.step, - slot, - "skip drop dynamic table while transaction is open" - ); - return Ok(CommitlogObservation::Skipped); - } - let conn = self.normalize_conn(conn); - debug!(step = self.step, conn = %conn, slot, "drop dynamic table"); - self.with_mut_tx(conn, |engine, tx| { - let Some(state) = engine.dynamic_tables.remove(&slot) else { - return Ok(()); - }; - if let Err(err) = engine.db()?.drop_table(tx, state.table_id) { - let msg = err.to_string(); - if !msg.contains("not found") { - return Err(format!("drop dynamic table slot={slot} failed: {err}")); - } - } - Ok(()) - })?; - self.refresh_observed_durable_offset(false)?; - Ok(CommitlogObservation::Applied) - } - - fn migrate_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { - if self.execution.active_writer.is_some() || self.any_open_read_tx() { - trace!( - step = self.step, - slot, - "skip migrate dynamic table while transaction is open" - ); - return Ok(CommitlogObservation::Skipped); - } - let conn = self.normalize_conn(conn); - debug!(step = self.step, conn = %conn, slot, "migrate dynamic table"); - let probe = self.with_mut_tx(conn, |engine, tx| { - let Some(state) = engine.dynamic_tables.get(&slot).cloned() else { - return Ok(None); - }; - let to_version = state.version.saturating_add(1); - let new_table_id = engine - .db()? - .add_columns_to_table( - tx, - state.table_id, - dynamic_column_schemas(to_version), - vec![AlgebraicValue::Bool(false)], - ) - .map_err(|err| format!("migrate add_columns_to_table failed for slot={slot}: {err}"))?; - let existing_rows = engine - .db()? - .iter_mut(tx, new_table_id) - .map_err(|err| format!("migrate scan table failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - - let probe_row = dynamic_probe_row(slot, to_version); - let inserted = engine.insert_row( - tx, - new_table_id, - &probe_row, - format!("migrate auto-inc probe failed for slot={slot}"), - )?; - engine.dynamic_tables.insert( - slot, - DynamicTableState { - name: state.name, - version: to_version, - table_id: new_table_id, - }, - ); - Ok(Some(DynamicMigrationProbe { - slot, - from_version: state.version, - to_version, - existing_rows, - inserted_row: inserted, - })) - })?; - self.refresh_observed_durable_offset(false)?; - Ok(probe - .map(CommitlogObservation::DynamicMigrationProbe) - .unwrap_or(CommitlogObservation::Skipped)) - } - - fn normalize_conn(&self, conn: SessionId) -> SessionId { - self.execution.active_writer.unwrap_or(conn) - } - fn any_open_read_tx(&self) -> bool { self.read_tx_by_connection.iter().any(Option::is_some) } - fn refresh_observed_durable_offset(&mut self, forced: bool) -> Result<(), String> { - let durable_offset = self.durability.durable_tx_offset().last_seen(); - if forced || durable_offset != self.last_observed_durable_offset { - self.last_observed_durable_offset = durable_offset; - } - Ok(()) - } - - async fn wait_for_requested_durability(&mut self, forced: bool) -> Result<(), String> { - if let Some(target_offset) = self.last_requested_durable_offset { - let current = self.durability.durable_tx_offset().last_seen(); - if current.is_none_or(|offset| offset < target_offset) { - let mut durable_offset = self.durability.durable_tx_offset(); - sim::time::timeout(DURABILITY_WAIT_TIMEOUT, durable_offset.wait_for(target_offset)) - .await - .map_err(|err| { - format!( - "durability wait for tx offset {target_offset} timed out after {:?}", - err.duration() - ) - })? - .map_err(|err| format!("durability wait for tx offset {target_offset} failed: {err}"))?; - } - } else if forced { - sim::yield_now().await; - } - self.refresh_observed_durable_offset(forced) - } - - fn record_committed_offset(&mut self, offset: Option) { - if let Some(offset) = offset { - self.last_requested_durable_offset = Some(offset); - } - } - fn is_in_write_tx(&self, conn: SessionId) -> bool { self.execution .tx_by_connection @@ -1102,13 +597,6 @@ impl RelationalDbEngine { .is_some_and(Option::is_some) } - fn refresh_if_auto_commit(&mut self, in_tx: bool) -> Result<(), String> { - if !in_tx { - self.refresh_observed_durable_offset(false)?; - } - Ok(()) - } - fn table_id_for_index(&self, table: usize) -> Result { self.base_table_ids .get(table) @@ -1119,59 +607,20 @@ impl RelationalDbEngine { fn with_fresh_read_tx(&self, f: impl FnOnce(&RelationalDB, &RelTx) -> Result) -> Result { let db = self.db()?; let tx = db.begin_tx(Workload::ForTests); - self.stats.record_read_tx(); let result = f(db, &tx); let _ = db.release_tx(tx); result } - fn collect_rows_in_fresh_tx(&self, table_id: TableId, context: &'static str) -> Result, String> { + fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { self.with_fresh_read_tx(|db, tx| { - Ok(db + let mut rows = db .iter(tx, table_id) - .map_err(|err| format!("{context}: {err}"))? + .map_err(|err| format!("scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>()) - }) - } - - fn count_rows_in_fresh_tx(&self, table_id: TableId, context: &'static str) -> Result { - self.with_fresh_read_tx(|db, tx| { - Ok(db - .iter(tx, table_id) - .map_err(|err| format!("{context}: {err}"))? - .count()) - }) - } - - fn count_by_col_eq_in_fresh_tx( - &self, - table_id: TableId, - col: u16, - value: &AlgebraicValue, - context: &'static str, - ) -> Result { - self.with_fresh_read_tx(|db, tx| { - Ok(db - .iter_by_col_eq(tx, table_id, col, value) - .map_err(|err| format!("{context}: {err}"))? - .count()) - }) - } - - fn range_scan_in_fresh_tx( - &self, - table_id: TableId, - cols: spacetimedb_primitives::ColList, - bounds: (Bound, Bound), - context: &'static str, - ) -> Result, String> { - self.with_fresh_read_tx(|db, tx| { - Ok(db - .iter_by_col_range(tx, table_id, cols, bounds) - .map_err(|err| format!("{context}: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>()) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) }) } @@ -1248,7 +697,12 @@ impl RelationalDbEngine { .map_err(|err| format!("read-tx predicate query failed: {err}"))? .count()) } else { - self.count_by_col_eq_in_fresh_tx(table_id, col, value, "predicate query failed") + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("predicate query failed: {err}"))? + .count()) + }) } } @@ -1261,153 +715,68 @@ impl RelationalDbEngine { upper: Bound, ) -> Result, String> { let table_id = self.table_id_for_index(table)?; - let col_list = cols.iter().copied().collect::(); - let mut rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { - self.db()? - .iter_by_col_range_mut(tx, table_id, col_list, (lower, upper)) + let cols_list = cols.iter().copied().collect::(); + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + let mut rows = self + .db()? + .iter_by_col_range_mut(tx, table_id, cols_list, (lower, upper)) .map_err(|err| format!("in-tx range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>() + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { - self.db()? - .iter_by_col_range(tx, table_id, col_list, (lower, upper)) + let mut rows = self + .db()? + .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) .map_err(|err| format!("read-tx range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>() + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) } else { - self.range_scan_in_fresh_tx(table_id, col_list, (lower, upper), "range scan failed")? - }; - rows.sort_by(|lhs, rhs| compare_rows_for_range(lhs, rhs, cols)); - Ok(rows) - } - - fn count_rows_for_property(&self, table: usize) -> Result { - let table_id = self.table_id_for_index(table)?; - self.count_rows_in_fresh_tx(table_id, "scan failed") - } - - fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { - let table_id = self.table_id_for_index(table)?; - self.count_by_col_eq_in_fresh_tx(table_id, col, value, "predicate query failed") - } - - fn range_scan_for_property( - &self, - table: usize, - cols: &[u16], - lower: Bound, - upper: Bound, - ) -> Result, String> { - let table_id = self.table_id_for_index(table)?; - let cols = cols.iter().copied().collect::(); - self.range_scan_in_fresh_tx(table_id, cols, (lower, upper), "range scan failed") - } - - fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { - let mut rows = self.collect_rows_in_fresh_tx(table_id, "scan failed")?; - rows.sort_by_key(|row| row.id().unwrap_or_default()); - Ok(rows) - } - - fn durable_replay_summary(&self) -> Result { - Ok(DurableReplaySummary { - durable_offset: self.last_observed_durable_offset, - restored_snapshot_offset: self.last_restored_snapshot_offset, - latest_snapshot_offset: self.latest_snapshot_offset, - base_rows: self.collect_base_rows()?, - dynamic_table_count: self.dynamic_tables.len(), - }) - } - - async fn reopen_for_final_replay_check(&mut self) -> Result { - let old_db = self - .db - .take() - .ok_or_else(|| "final replay check failed: relational db not initialized".to_string())?; - old_db.shutdown().await; - drop(old_db); - - let reopened = self.reopen_from_history_with_fault_retry("final replay check")?; - self.durability = reopened.durability; - self.db = Some(reopened.db); - self.last_restored_snapshot_offset = reopened.restored_snapshot_offset; - self.latest_snapshot_offset = reopened.latest_snapshot_offset; - self.rebuild_table_handles_after_reopen()?; - self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); - self.durable_replay_summary() + self.with_fresh_read_tx(|db, tx| { + let mut rows = db + .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) + .map_err(|err| format!("range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + }) + } } +} - async fn collect_outcome(&mut self) -> Result { - self.wait_for_requested_durability(true).await?; - let table = self.collect_table_outcome()?; - let replay = self.reopen_for_final_replay_check().await?; - let durable_commit_count = self - .last_observed_durable_offset - .map(|offset| (offset as usize).saturating_add(1)) - .unwrap_or(0); - let replay_table_count = replay.base_rows.len() + replay.dynamic_table_count; - debug!(durable_commits = durable_commit_count, "replayed durable prefix"); - Ok(RelationalDbCommitlogOutcome { - applied_steps: self.step, - durable_commit_count, - replay_table_count, - schema: schema_summary(&self.base_schema), - interactions: self.stats.interactions.clone(), - table_ops: self.stats.table_ops.clone(), - transactions: self.stats.transaction_summary(durable_commit_count), - runtime: self.stats.runtime_summary(), - disk_faults: disk_fault_summary(self.commitlog_repo.fault_summary()), - snapshot_faults: disk_fault_summary(self.snapshot_repo.fault_summary()), - replay, - table, - }) - } +impl TargetEngine for RelationalDbEngine { + type Observation = TableObservation; + type Outcome = TableWorkloadOutcome; + type Error = String; - fn collect_base_rows(&self) -> Result>, String> { - self.base_table_ids - .iter() - .map(|&table_id| self.collect_rows_by_id(table_id)) - .collect() + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a TableWorkloadInteraction, + ) -> impl std::future::Future> + 'a { + async move { self.execute(interaction) } } - fn collect_table_outcome(&self) -> Result { - let mut final_rows = Vec::with_capacity(self.base_table_ids.len()); - let mut final_row_counts = Vec::with_capacity(self.base_table_ids.len()); - - for &table_id in &self.base_table_ids { - let rows = self.collect_rows_by_id(table_id)?; - final_row_counts.push(rows.len() as u64); - final_rows.push(rows); - } - - Ok(TableWorkloadOutcome { - final_row_counts, - final_rows, - }) - } + fn finish(&mut self) {} - fn finish(&mut self) { - for tx in &mut self.execution.tx_by_connection { - if let Some(tx) = tx.take() - && let Some(db) = &self.db - { - let _ = db.rollback_mut_tx(tx); - } - } - for tx in &mut self.read_tx_by_connection { - if let Some(tx) = tx.take() - && let Some(db) = &self.db - { - let _ = db.release_tx(tx); + fn collect_outcome<'a>(&'a mut self) -> impl std::future::Future> + 'a { + async move { + let mut final_rows = Vec::with_capacity(self.base_schema.tables.len()); + let mut final_row_counts = Vec::with_capacity(self.base_schema.tables.len()); + for table in 0..self.base_schema.tables.len() { + let table_id = self.table_id_for_index(table).map_err(anyhow::Error::msg)?; + let rows = self.collect_rows_by_id(table_id).map_err(anyhow::Error::msg)?; + final_row_counts.push(rows.len() as u64); + final_rows.push(rows); } + Ok(TableWorkloadOutcome { + final_row_counts, + final_rows, + }) } - self.execution.active_writer = None; - } - - fn db(&self) -> Result<&RelationalDB, String> { - self.db - .as_ref() - .ok_or_else(|| "relational db is unavailable during close/reopen".to_string()) } } @@ -1417,24 +786,36 @@ impl TargetPropertyAccess for RelationalDbEngine { } fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String> { - Self::lookup_base_row(self, conn, table, id) + self.lookup_base_row(conn, table, id) } fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { - Self::collect_rows_in_connection(self, conn, table) + self.collect_rows_in_connection(conn, table) } fn collect_rows_for_table(&self, table: usize) -> Result, String> { let table_id = self.table_id_for_index(table)?; - Self::collect_rows_by_id(self, table_id) + self.collect_rows_by_id(table_id) } fn count_rows(&self, table: usize) -> Result { - Self::count_rows_for_property(self, table) + let table_id = self.table_id_for_index(table)?; + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter(tx, table_id) + .map_err(|err| format!("count rows failed: {err}"))? + .count()) + }) } fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { - Self::count_by_col_eq_for_property(self, table, col, value) + let table_id = self.table_id_for_index(table)?; + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("count by col eq failed: {err}"))? + .count()) + }) } fn range_scan( @@ -1444,337 +825,24 @@ impl TargetPropertyAccess for RelationalDbEngine { lower: Bound, upper: Bound, ) -> Result, String> { - Self::range_scan_for_property(self, table, cols, lower, upper) - } -} - -impl TargetEngine for RelationalDbEngine { - type Observation = CommitlogObservation; - type Outcome = RelationalDbCommitlogOutcome; - type Error = String; - - #[allow(clippy::manual_async_fn)] - fn execute_interaction<'a>( - &'a mut self, - interaction: &'a CommitlogInteraction, - ) -> impl std::future::Future> + 'a { - async move { self.execute(interaction).await } - } - - fn finish(&mut self) { - Self::finish(self); - } - - #[allow(clippy::manual_async_fn)] - fn collect_outcome<'a>(&'a mut self) -> impl std::future::Future> + 'a { - async move { - RelationalDbEngine::collect_outcome(self) - .await - .map_err(anyhow::Error::msg) - } - } -} - -type StressCommitlogRepo = FaultableRepo; -type StressSnapshotRepo = BuggifiedSnapshotRepo; -type InMemoryCommitlogDurability = Local; - -struct RelationalDbBootstrap { - db: RelationalDB, - commitlog_repo: StressCommitlogRepo, - snapshot_repo: StressSnapshotRepo, - snapshot_worker: SnapshotWorker, - durability: Arc, - durability_opts: spacetimedb_durability::local::Options, -} - -fn bootstrap_relational_db( - seed: DstSeed, - commitlog_fault_config: CommitlogFaultConfig, - snapshot_fault_config: SnapshotFaultConfig, -) -> anyhow::Result { - let runtime = Handle::tokio_current(); - let commitlog_repo = FaultableRepo::new( - MemoryCommitlogRepo::new(8 * 1024 * 1024), - commitlog_fault_config, - seed.fork(702), - ); - let snapshot_repo = BuggifiedSnapshotRepo::new(snapshot_fault_config, seed.fork(703))?; - let durability_opts = commitlog_stress_options(seed.fork(701)); - let durability = Arc::new( - InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime.clone(), durability_opts) - .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?, - ); - let snapshot_worker = SnapshotWorker::new( - Arc::new(snapshot_repo.clone()), - snapshot::Compression::Disabled, - runtime.clone(), - ); - let persistence = Persistence { - durability: durability.clone(), - disk_size: Arc::new(in_memory_size_on_disk), - snapshot_store: Some(snapshot_worker.snapshot_store()), - snapshots: Some(snapshot_worker.clone()), - runtime, - }; - let (db, connected_clients) = RelationalDB::open( - Identity::ZERO, - Identity::ZERO, - EmptyHistory::new(), - Some(persistence), - None, - PagePool::new_for_test(), - )?; - assert_eq!(connected_clients.len(), 0); - db.with_auto_commit(Workload::Internal, |tx| { - db.set_initialized(tx, Program::empty(HostType::Wasm.into())) - })?; - Ok(RelationalDbBootstrap { - db, - commitlog_repo, - snapshot_repo, - snapshot_worker, - durability, - durability_opts, - }) -} - -fn commitlog_stress_options(seed: DstSeed) -> spacetimedb_durability::local::Options { - let mut opts = spacetimedb_durability::local::Options::default(); - opts.commitlog.max_segment_size = 2 * 1024; - opts.commitlog.offset_index_interval_bytes = NonZeroU64::new(256).expect("256 > 0"); - opts.commitlog.offset_index_require_segment_fsync = seed.0.is_multiple_of(2); - opts.commitlog.write_buffer_size = 512; - opts -} - -fn runtime_alive_tasks() -> Option { - // The shim only exposes Tokio-compatible handles today. Keep this explicit - // until the target owns a simulator/runtime that can report live task state. - None -} - -fn schema_summary(schema: &SchemaPlan) -> SchemaSummary { - let initial_tables = schema.tables.len(); - let initial_columns = schema.tables.iter().map(|table| table.columns.len()).sum(); - let max_columns_per_table = schema - .tables - .iter() - .map(|table| table.columns.len()) - .max() - .unwrap_or_default(); - let extra_indexes = schema - .tables - .iter() - .map(|table| table.extra_indexes.len()) - .sum::(); - SchemaSummary { - initial_tables, - initial_columns, - max_columns_per_table, - initial_indexes: initial_tables + extra_indexes, - extra_indexes, - } -} - -fn disk_fault_summary(summary: CommitlogFaultSummary) -> DiskFaultSummary { - DiskFaultSummary { - profile: summary.profile, - latency: summary.latency, - short_read: summary.short_read, - short_write: summary.short_write, - read_error: summary.read_error, - write_error: summary.write_error, - flush_error: summary.flush_error, - fsync_error: summary.fsync_error, - open_error: summary.open_error, - metadata_error: summary.metadata_error, + let table_id = self.table_id_for_index(table)?; + let cols_list = cols.iter().copied().collect::(); + self.with_fresh_read_tx(|db, tx| { + let mut rows = db + .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) + .map_err(|err| format!("range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + }) } } -fn in_memory_size_on_disk() -> io::Result { - Ok(SizeOnDisk::default()) -} - fn is_unique_constraint_violation(err: &DBError) -> bool { - matches!( - err, - DBError::Datastore(DatastoreError::Index(IndexError::UniqueConstraintViolation(_))) - ) + err.to_string().contains("Unique") || err.to_string().contains("unique") } fn is_write_conflict_error(err: &str) -> bool { - err.contains("owns lock") -} - -fn compare_rows_for_range(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { - lhs.project_key(cols) - .to_algebraic_value() - .cmp(&rhs.project_key(cols).to_algebraic_value()) - .then_with(|| lhs.values.cmp(&rhs.values)) -} - -fn dynamic_table_name(slot: u32) -> String { - format!("dst_dynamic_slot_{slot}") -} - -fn dynamic_column_schemas(version: u32) -> Vec { - let mut columns = vec![ - ColumnSchema::for_test(0, "id", AlgebraicType::I64), - ColumnSchema::for_test(1, "value", AlgebraicType::U64), - ]; - for v in 1..=version { - columns.push(ColumnSchema::for_test( - (v + 1) as u16, - format!("migrated_v{v}"), - AlgebraicType::Bool, - )); - } - columns -} - -fn dynamic_probe_row(slot: u32, version: u32) -> SimRow { - let mut values = vec![AlgebraicValue::I64(0), AlgebraicValue::U64(slot as u64)]; - for _ in 1..=version { - values.push(AlgebraicValue::Bool(false)); - } - SimRow { values } -} - -fn dynamic_schema(name: &str, version: u32) -> TableSchema { - let columns = dynamic_column_schemas(version); - let indexes = vec![IndexSchema::for_test(format!("{name}_id_idx"), BTreeAlgorithm::from(0))]; - let constraints = vec![ConstraintSchema::unique_for_test(format!("{name}_id_unique"), 0)]; - let sequences = vec![SequenceSchema { - sequence_id: SequenceId::SENTINEL, - sequence_name: format!("{name}_id_seq").into(), - table_id: TableId::SENTINEL, - col_pos: 0.into(), - increment: 1, - start: 1, - min_value: 1, - max_value: i128::MAX, - }]; - TableSchema::new( - TableId::SENTINEL, - TableName::for_test(name), - None, - columns, - indexes, - constraints, - sequences, - StTableType::User, - StAccess::Public, - None, - Some(0.into()), - false, - None, - ) -} - -#[cfg(test)] -mod tests { - use crate::config::CommitlogFaultProfile; - - use super::*; - - fn run_seed_12_with_snapshot_fault( - configure: impl FnOnce(&mut SnapshotFaultConfig), - ) -> RelationalDbCommitlogOutcome { - let seed = DstSeed(12); - let config = RunConfig::with_max_interactions(100).with_commitlog_fault_profile(CommitlogFaultProfile::Off); - let mut snapshot_fault_config = SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Off); - snapshot_fault_config.enabled = true; - configure(&mut snapshot_fault_config); - let mut runtime = sim::Runtime::new(seed).unwrap(); - - runtime - .block_on(async move { - let (source, engine, properties) = build_with_fault_configs( - seed, - TableScenarioId::RandomCrud, - &config, - CommitlogFaultConfig::for_profile(CommitlogFaultProfile::Off), - snapshot_fault_config, - )?; - core::run_streaming(source, engine, properties, config).await - }) - .unwrap() - } - - #[test] - fn seed_12_exercises_snapshot_capture_and_restore() { - let seed = DstSeed(12); - let config = RunConfig::with_max_interactions(100).with_commitlog_fault_profile(CommitlogFaultProfile::Off); - let mut runtime = sim::Runtime::new(seed).unwrap(); - - let outcome = runtime - .block_on(run_generated_with_config_and_scenario( - seed, - TableScenarioId::RandomCrud, - config, - )) - .unwrap(); - - assert_eq!(outcome.interactions.snapshot_requested, 2); - assert_eq!(outcome.interactions.snapshot_created, 2); - assert_eq!(outcome.interactions.close_reopen_applied, 1); - assert!(outcome.replay.durable_offset.is_some()); - assert!(outcome.replay.restored_snapshot_offset.is_some()); - assert!(outcome.replay.restored_snapshot_offset <= outcome.replay.durable_offset); - } - - #[test] - fn targeted_snapshot_open_faults_are_skipped_and_replay_matches_model() { - let outcome = run_seed_12_with_snapshot_fault(|config| config.open_error_prob = 1.0); - - assert_eq!(outcome.interactions.snapshot_requested, 2); - assert_eq!(outcome.interactions.snapshot_created, 0); - assert_eq!(outcome.interactions.snapshot_skipped, 2); - assert!(outcome.snapshot_faults.open_error > 0); - assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); - } - - #[test] - fn targeted_snapshot_metadata_faults_are_retryable_on_reopen() { - let outcome = run_seed_12_with_snapshot_fault(|config| config.metadata_error_prob = 1.0); - - assert_eq!(outcome.interactions.close_reopen_applied, 1); - assert!(outcome.snapshot_faults.metadata_error > 0); - assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); - } - - #[test] - fn targeted_snapshot_read_faults_are_retryable_on_reopen() { - let outcome = run_seed_12_with_snapshot_fault(|config| config.read_error_prob = 1.0); - - assert_eq!(outcome.interactions.snapshot_created, 2); - assert!(outcome.snapshot_faults.read_error > 0); - assert!(outcome.replay.restored_snapshot_offset.is_some()); - assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); - } - - #[test] - fn targeted_snapshot_write_faults_do_not_publish_new_snapshots() { - let outcome = run_seed_12_with_snapshot_fault(|config| config.write_error_prob = 1.0); - - assert_eq!(outcome.interactions.snapshot_requested, 2); - assert_eq!(outcome.interactions.snapshot_created, 0); - assert_eq!(outcome.interactions.snapshot_skipped, 2); - assert!(outcome.snapshot_faults.write_error > 0); - assert!(outcome.replay.restored_snapshot_offset.is_none()); - assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); - } - - #[test] - fn targeted_snapshot_fsync_faults_do_not_publish_new_snapshots() { - let outcome = run_seed_12_with_snapshot_fault(|config| config.fsync_error_prob = 1.0); - - assert_eq!(outcome.interactions.snapshot_requested, 2); - assert_eq!(outcome.interactions.snapshot_created, 0); - assert_eq!(outcome.interactions.snapshot_skipped, 2); - assert!(outcome.snapshot_faults.fsync_error > 0); - assert!(outcome.replay.restored_snapshot_offset.is_none()); - assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); - } + err.contains("WriteConflict") || err.contains("write conflict") || err.contains("Serialization failure") } diff --git a/crates/dst/src/targets/relational_db_concurrent.rs b/crates/dst/src/targets/relational_db_concurrent.rs deleted file mode 100644 index f0299470779..00000000000 --- a/crates/dst/src/targets/relational_db_concurrent.rs +++ /dev/null @@ -1,1045 +0,0 @@ -//! Concurrent RelationalDB API target. -//! -//! The target models concurrency at RelationalDB lock boundaries. A generated -//! round may hold one or more read transactions, or one write transaction, and -//! then probe whether another client can acquire the write lock. Once a client -//! owns a `Tx` or `MutTx`, that section is synchronous: no simulator yield or -//! async boundary is allowed until the transaction is released, committed, or -//! rolled back. - -use std::{collections::BTreeMap, fmt}; - -use spacetimedb_core::{ - db::relational_db::{MutTx as RelMutTx, RelationalDB, Tx as RelTx}, - error::DBError, - messages::control_db::HostType, -}; -use spacetimedb_datastore::{execution_context::Workload, traits::IsolationLevel}; -use spacetimedb_durability::EmptyHistory; -use spacetimedb_lib::{ - db::auth::{StAccess, StTableType}, - Identity, -}; -use spacetimedb_primitives::TableId; -use spacetimedb_sats::AlgebraicValue; -use spacetimedb_schema::{ - def::BTreeAlgorithm, - schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, - table_name::TableName, -}; -use spacetimedb_table::page_pool::PagePool; -use tracing::info; - -use crate::{ - client::SessionId, - config::RunConfig, - core::{self, StreamingProperties, TargetEngine, WorkloadSource}, - schema::SimRow, - seed::{DstRng, DstSeed}, -}; - -pub async fn run_generated_with_config( - seed: DstSeed, - config: RunConfig, -) -> anyhow::Result { - let source = ConcurrentWorkloadSource::new(seed, config.max_interactions_or_default(usize::MAX)); - let engine = ConcurrentRelationalDbEngine::new()?; - let outcome = core::run_streaming(source, engine, ConcurrentProperties, config).await?; - info!( - rounds = outcome.rounds, - committed = outcome.committed, - conflicts = outcome.write_conflicts, - "relational_db_concurrent complete" - ); - Ok(outcome) -} - -#[derive(Clone, Debug)] -struct RoundPlan { - id: u64, - kind: RoundKind, - shared: SimRow, - extra: SimRow, -} - -#[derive(Clone, Copy, Debug)] -enum RoundKind { - WriterBlocksWriter, - ReadersBlockWriter, - MultiReaderSnapshot, - MixedReadWrite, -} - -struct ConcurrentWorkloadSource { - rng: DstRng, - emitted: usize, - target: usize, - next_id: u64, -} - -impl ConcurrentWorkloadSource { - fn new(seed: DstSeed, target: usize) -> Self { - Self { - rng: seed.fork(910).rng(), - emitted: 0, - target, - next_id: seed.fork(911).0.max(1), - } - } - - fn make_row(&mut self) -> SimRow { - let id = self.next_id; - self.next_id = self.next_id.wrapping_add(1).max(1); - SimRow { - values: vec![ - AlgebraicValue::U64(id), - AlgebraicValue::U64(self.rng.next_u64() % 1_000), - ], - } - } - - fn make_round(&mut self, id: u64) -> RoundPlan { - RoundPlan { - id, - kind: match id % 4 { - 0 => RoundKind::WriterBlocksWriter, - 1 => RoundKind::ReadersBlockWriter, - 2 => RoundKind::MultiReaderSnapshot, - _ => RoundKind::MixedReadWrite, - }, - shared: self.make_row(), - extra: self.make_row(), - } - } -} - -impl WorkloadSource for ConcurrentWorkloadSource { - type Interaction = RoundPlan; - - fn next_interaction(&mut self) -> Option { - if self.emitted >= self.target { - return None; - } - let round = self.make_round(self.emitted as u64); - self.emitted += 1; - Some(round) - } - - fn request_finish(&mut self) { - self.target = self.emitted; - } -} - -struct ConcurrentRelationalDbEngine { - db: RelationalDB, - table_id: TableId, - events: Vec, -} - -impl ConcurrentRelationalDbEngine { - fn new() -> anyhow::Result { - let (db, connected_clients) = RelationalDB::open( - Identity::ZERO, - Identity::ZERO, - EmptyHistory::new(), - None, - None, - PagePool::new_for_test(), - )?; - assert_eq!(connected_clients.len(), 0); - db.with_auto_commit(Workload::Internal, |tx| { - db.set_initialized(tx, spacetimedb_datastore::traits::Program::empty(HostType::Wasm.into())) - })?; - - let table_id = install_concurrent_schema(&db)?; - Ok(Self { - db, - table_id, - events: Vec::new(), - }) - } - - fn execute_round(&mut self, round: &RoundPlan) -> Result { - let mut machine = RoundMachine::new(&self.db, self.table_id, round.id, 4); - let events = machine.run(round)?; - self.events.extend(events.clone()); - Ok(RoundObservation { - round: round.id, - events, - }) - } - - fn collect_rows(&self) -> Result, String> { - let tx = self.db.begin_tx(Workload::ForTests); - let result = collect_rows_in_tx(&self.db, self.table_id, &tx, "collect rows"); - let _ = self.db.release_tx(tx); - result - } -} - -impl TargetEngine for ConcurrentRelationalDbEngine { - type Observation = RoundObservation; - type Outcome = RelationalDbConcurrentOutcome; - type Error = String; - - fn execute_interaction<'a>( - &'a mut self, - interaction: &'a RoundPlan, - ) -> impl Future> + 'a { - async move { self.execute_round(interaction) } - } - - fn finish(&mut self) {} - - fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a { - async move { - let final_rows = self.collect_rows().map_err(anyhow::Error::msg)?; - let expected_rows = expected_rows_from_events(&self.events); - let summary = ConcurrentSummary::from_events(&self.events); - Ok(RelationalDbConcurrentOutcome { - rounds: summary.rounds, - clients: summary.clients, - events: summary.events, - reads: summary.reads, - committed: summary.committed, - write_conflicts: summary.write_conflicts, - writer_conflicts: summary.writer_conflicts, - reader_conflicts: summary.reader_conflicts, - final_rows, - expected_rows, - }) - } - } -} - -struct RoundMachine<'a> { - db: &'a RelationalDB, - table_id: TableId, - round: u64, - clients: Vec, - events: Vec, -} - -impl<'a> RoundMachine<'a> { - fn new(db: &'a RelationalDB, table_id: TableId, round: u64, clients: usize) -> Self { - Self { - db, - table_id, - round, - clients: (0..clients).map(|_| ClientState::Idle).collect(), - events: Vec::new(), - } - } - - fn run(&mut self, round: &RoundPlan) -> Result, String> { - let result = match round.kind { - RoundKind::WriterBlocksWriter => self.writer_blocks_writer(round), - RoundKind::ReadersBlockWriter => self.readers_block_writer(round), - RoundKind::MultiReaderSnapshot => self.multi_reader_snapshot(round), - RoundKind::MixedReadWrite => self.mixed_read_write(round), - }; - let cleanup = self.cleanup(); - result.and(cleanup)?; - Ok(std::mem::take(&mut self.events)) - } - - fn writer_blocks_writer(&mut self, round: &RoundPlan) -> Result<(), String> { - self.begin_write(client(0))?; - self.insert(client(0), round.shared.clone())?; - self.expect_write_conflict(client(1), ConflictReason::WriterHeld)?; - self.commit(client(0))?; - - self.begin_write(client(1))?; - self.insert(client(1), round.extra.clone())?; - self.commit(client(1)) - } - - fn readers_block_writer(&mut self, round: &RoundPlan) -> Result<(), String> { - self.begin_read(client(0))?; - self.begin_read(client(1))?; - self.full_scan(client(0))?; - self.full_scan(client(1))?; - self.expect_write_conflict(client(2), ConflictReason::ReadersHeld)?; - self.release_read(client(0))?; - self.release_read(client(1))?; - - self.begin_write(client(2))?; - self.insert(client(2), round.shared.clone())?; - self.commit(client(2)) - } - - fn multi_reader_snapshot(&mut self, round: &RoundPlan) -> Result<(), String> { - self.begin_read(client(0))?; - self.begin_read(client(1))?; - let snapshot_0 = self.full_scan(client(0))?; - let snapshot_1 = self.full_scan(client(1))?; - if snapshot_0 != snapshot_1 { - return Err(format!( - "[ConcurrentRelationalDb] round={} readers observed different snapshots: left={snapshot_0:?} right={snapshot_1:?}", - self.round - )); - } - self.release_read(client(0))?; - self.release_read(client(1))?; - - self.begin_write(client(2))?; - self.insert(client(2), round.shared.clone())?; - self.commit(client(2))?; - - self.begin_read(client(3))?; - self.point_lookup(client(3), round.shared.id().ok_or("generated row missing id")?)?; - self.release_read(client(3)) - } - - fn mixed_read_write(&mut self, round: &RoundPlan) -> Result<(), String> { - self.begin_write(client(0))?; - self.insert(client(0), round.shared.clone())?; - self.commit(client(0))?; - - self.begin_read(client(1))?; - self.point_lookup(client(1), round.shared.id().ok_or("generated row missing id")?)?; - self.release_read(client(1))?; - - self.begin_write(client(2))?; - self.delete(client(2), round.shared.clone())?; - self.rollback(client(2)); - - self.begin_write(client(3))?; - self.insert(client(3), round.extra.clone())?; - self.commit(client(3)) - } - - fn begin_read(&mut self, client: SessionId) -> Result<(), String> { - if self.any_writer() { - return Err(format!( - "[ConcurrentRelationalDb] round={} client={} would block beginning read while writer is held", - self.round, client - )); - } - self.expect_idle(client, "begin_read")?; - self.record_action(client, "begin_read"); - let tx = self.db.begin_tx(Workload::ForTests); - self.replace(client, ClientState::Reading { tx }); - Ok(()) - } - - fn release_read(&mut self, client: SessionId) -> Result<(), String> { - self.record_action(client, "release_read"); - match self.take(client)? { - ClientState::Reading { tx } => { - let _ = self.db.release_tx(tx); - self.replace(client, ClientState::Idle); - Ok(()) - } - state => { - self.replace(client, state); - Err(self.invalid_state(client, "release_read")) - } - } - } - - fn begin_write(&mut self, client: SessionId) -> Result<(), String> { - if self.try_begin_write(client)? { - Ok(()) - } else { - Err(format!( - "[ConcurrentRelationalDb] round={} client={} expected write lock to be available", - self.round, client - )) - } - } - - fn expect_write_conflict(&mut self, client: SessionId, reason: ConflictReason) -> Result<(), String> { - if self.try_begin_write(client)? { - self.rollback(client); - return Err(format!( - "[ConcurrentRelationalDb] round={} client={} unexpectedly acquired write lock", - self.round, client - )); - } - match self.events.last() { - Some(RoundEvent::WriteConflict { reason: observed, .. }) if *observed == reason => Ok(()), - Some(event) => Err(format!( - "[ConcurrentRelationalDb] round={} expected conflict reason {reason:?}, observed {event}", - self.round - )), - None => Err(format!( - "[ConcurrentRelationalDb] round={} expected write conflict event", - self.round - )), - } - } - - fn try_begin_write(&mut self, client: SessionId) -> Result { - self.expect_idle(client, "try_begin_write")?; - self.record_action(client, "try_begin_write"); - match self - .db - .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) - { - Some(tx) => { - self.replace( - client, - ClientState::Writing { - tx, - pending: Vec::new(), - }, - ); - self.events.push(RoundEvent::WriteLockAcquired { - round: self.round, - client, - }); - Ok(true) - } - None => { - self.events.push(RoundEvent::WriteConflict { - round: self.round, - client, - reason: self.conflict_reason(), - }); - Ok(false) - } - } - } - - fn insert(&mut self, client: SessionId, row: SimRow) -> Result<(), String> { - self.record_action(client, "insert"); - let table_id = self.table_id; - let db = self.db; - self.with_writer(client, |tx, pending| { - let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; - match db.insert(tx, table_id, &bsatn) { - Ok((_, row_ref, _)) => { - pending.push(ConcurrentMutation::Inserted(SimRow::from_product_value( - row_ref.to_product_value(), - ))); - Ok(()) - } - Err(err) if is_unique_constraint_violation(&err) => Ok(()), - Err(err) => Err(format!("insert failed: {err}")), - } - }) - } - - fn delete(&mut self, client: SessionId, row: SimRow) -> Result<(), String> { - self.record_action(client, "delete"); - let table_id = self.table_id; - let db = self.db; - self.with_writer(client, |tx, pending| { - match db.delete_by_rel(tx, table_id, [row.to_product_value()]) { - 0 => Ok(()), - 1 => { - pending.push(ConcurrentMutation::Deleted(row)); - Ok(()) - } - deleted => Err(format!("delete affected {deleted} rows")), - } - }) - } - - fn commit(&mut self, client: SessionId) -> Result<(), String> { - self.record_action(client, "commit"); - match self.take(client)? { - ClientState::Writing { tx, mut pending } => { - let committed = self - .db - .commit_tx(tx) - .map_err(|err| format!("commit failed: {err}"))? - .ok_or_else(|| "commit returned no tx data".to_string())?; - self.events.push(RoundEvent::Committed { - round: self.round, - client, - tx_offset: committed.0, - mutations: std::mem::take(&mut pending), - }); - self.replace(client, ClientState::Idle); - Ok(()) - } - state => { - self.replace(client, state); - Err(self.invalid_state(client, "commit")) - } - } - } - - fn rollback(&mut self, client: SessionId) { - self.record_action(client, "rollback"); - match self.take(client) { - Ok(ClientState::Writing { tx, .. }) => { - let _ = self.db.rollback_mut_tx(tx); - self.events.push(RoundEvent::RolledBack { - round: self.round, - client, - }); - self.replace(client, ClientState::Idle); - } - Ok(state) => self.replace(client, state), - Err(_) => {} - } - } - - fn full_scan(&mut self, client: SessionId) -> Result { - self.record_action(client, "full_scan"); - let summary = self.with_reader(client, |tx| scan_summary_in_tx(self.db, self.table_id, tx, "full scan"))?; - self.events.push(RoundEvent::Read { - round: self.round, - client, - kind: ReadKind::FullScan, - summary, - }); - Ok(summary) - } - - fn point_lookup(&mut self, client: SessionId, id: u64) -> Result { - self.record_action(client, "point_lookup"); - let summary = self.with_reader(client, |tx| point_lookup_summary_in_tx(self.db, self.table_id, tx, id))?; - self.events.push(RoundEvent::Read { - round: self.round, - client, - kind: ReadKind::PointLookup { id }, - summary, - }); - Ok(summary) - } - - fn with_writer( - &mut self, - client: SessionId, - f: impl FnOnce(&mut RelMutTx, &mut Vec) -> Result, - ) -> Result { - match self.state_mut(client)? { - ClientState::Writing { tx, pending } => f(tx, pending), - _ => Err(self.invalid_state(client, "write operation")), - } - } - - fn with_reader(&self, client: SessionId, f: impl FnOnce(&RelTx) -> Result) -> Result { - match self.state(client)? { - ClientState::Reading { tx } => f(tx), - _ => Err(self.invalid_state(client, "read operation")), - } - } - - fn cleanup(&mut self) -> Result<(), String> { - let mut leaked = None; - for index in 0..self.clients.len() { - let client = SessionId::from_index(index); - match self.take(client)? { - ClientState::Idle => self.replace(client, ClientState::Idle), - ClientState::Reading { tx } => { - let _ = self.db.release_tx(tx); - self.replace(client, ClientState::Idle); - leaked.get_or_insert_with(|| { - format!( - "[ConcurrentRelationalDb] round={} client={} leaked read transaction", - self.round, client - ) - }); - } - ClientState::Writing { tx, .. } => { - let _ = self.db.rollback_mut_tx(tx); - self.replace(client, ClientState::Idle); - leaked.get_or_insert_with(|| { - format!( - "[ConcurrentRelationalDb] round={} client={} leaked write transaction", - self.round, client - ) - }); - } - } - } - match leaked { - Some(err) => Err(err), - None => Ok(()), - } - } - - fn conflict_reason(&self) -> ConflictReason { - if self.any_writer() { - ConflictReason::WriterHeld - } else if self.any_reader() { - ConflictReason::ReadersHeld - } else { - ConflictReason::Unknown - } - } - - fn any_reader(&self) -> bool { - self.clients.iter().any(ClientState::is_reading) - } - - fn any_writer(&self) -> bool { - self.clients.iter().any(ClientState::is_writing) - } - - fn expect_idle(&self, client: SessionId, action: &'static str) -> Result<(), String> { - if self.state(client)?.is_idle() { - Ok(()) - } else { - Err(self.invalid_state(client, action)) - } - } - - fn record_action(&mut self, client: SessionId, name: &'static str) { - self.events.push(RoundEvent::Action { - round: self.round, - client, - name, - }); - } - - fn state(&self, client: SessionId) -> Result<&ClientState, String> { - self.clients - .get(client.as_index()) - .ok_or_else(|| format!("[ConcurrentRelationalDb] unknown client {client}")) - } - - fn state_mut(&mut self, client: SessionId) -> Result<&mut ClientState, String> { - self.clients - .get_mut(client.as_index()) - .ok_or_else(|| format!("[ConcurrentRelationalDb] unknown client {client}")) - } - - fn take(&mut self, client: SessionId) -> Result { - let state = self.state_mut(client)?; - Ok(std::mem::replace(state, ClientState::Idle)) - } - - fn replace(&mut self, client: SessionId, state: ClientState) { - self.clients[client.as_index()] = state; - } - - fn invalid_state(&self, client: SessionId, action: &str) -> String { - format!( - "[ConcurrentRelationalDb] round={} client={} cannot {action} from {}", - self.round, - client, - self.state(client).map(ClientState::name).unwrap_or("unknown") - ) - } -} - -enum ClientState { - Idle, - Reading { - tx: RelTx, - }, - Writing { - tx: RelMutTx, - pending: Vec, - }, -} - -impl ClientState { - fn name(&self) -> &'static str { - match self { - Self::Idle => "idle", - Self::Reading { .. } => "reading", - Self::Writing { .. } => "writing", - } - } - - fn is_idle(&self) -> bool { - matches!(self, Self::Idle) - } - - fn is_reading(&self) -> bool { - matches!(self, Self::Reading { .. }) - } - - fn is_writing(&self) -> bool { - matches!(self, Self::Writing { .. }) - } -} - -#[derive(Clone, Debug)] -struct RoundObservation { - round: u64, - events: Vec, -} - -#[derive(Clone, Debug)] -pub struct RelationalDbConcurrentOutcome { - pub rounds: usize, - pub clients: usize, - pub events: usize, - pub reads: usize, - pub committed: usize, - pub write_conflicts: usize, - pub writer_conflicts: usize, - pub reader_conflicts: usize, - pub final_rows: Vec, - pub expected_rows: Vec, -} - -#[derive(Clone, Debug)] -enum RoundEvent { - Action { - round: u64, - client: SessionId, - name: &'static str, - }, - WriteLockAcquired { - round: u64, - client: SessionId, - }, - WriteConflict { - round: u64, - client: SessionId, - reason: ConflictReason, - }, - Committed { - round: u64, - client: SessionId, - tx_offset: u64, - mutations: Vec, - }, - RolledBack { - round: u64, - client: SessionId, - }, - Read { - round: u64, - client: SessionId, - kind: ReadKind, - summary: ReadSummary, - }, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum ConflictReason { - WriterHeld, - ReadersHeld, - Unknown, -} - -#[derive(Clone, Debug)] -enum ReadKind { - FullScan, - PointLookup { id: u64 }, -} - -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -struct ReadSummary { - row_count: usize, - checksum: u64, -} - -impl ReadSummary { - fn add_row(&mut self, row: &SimRow, label: &'static str) -> Result<(), String> { - self.row_count += 1; - self.checksum = self.checksum.wrapping_add(concurrent_row_checksum(row, label)?); - Ok(()) - } -} - -#[derive(Clone, Debug)] -enum ConcurrentMutation { - Inserted(SimRow), - Deleted(SimRow), -} - -#[derive(Default)] -struct ConcurrentSummary { - rounds: usize, - clients: usize, - events: usize, - reads: usize, - committed: usize, - write_conflicts: usize, - writer_conflicts: usize, - reader_conflicts: usize, -} - -impl ConcurrentSummary { - fn from_events(events: &[RoundEvent]) -> Self { - let mut summary = Self::default(); - let mut max_round = None; - let mut max_client = None; - - for event in events { - summary.events += 1; - let (round, client) = event.position(); - max_round = Some(max_round.unwrap_or(round).max(round)); - max_client = Some(max_client.unwrap_or(client.as_index()).max(client.as_index())); - - match event { - RoundEvent::WriteConflict { reason, .. } => { - summary.write_conflicts += 1; - match reason { - ConflictReason::WriterHeld => summary.writer_conflicts += 1, - ConflictReason::ReadersHeld => summary.reader_conflicts += 1, - ConflictReason::Unknown => {} - } - } - RoundEvent::Committed { .. } => summary.committed += 1, - RoundEvent::Read { .. } => summary.reads += 1, - RoundEvent::Action { .. } | RoundEvent::WriteLockAcquired { .. } | RoundEvent::RolledBack { .. } => {} - } - } - - summary.rounds = max_round.map(|round| round as usize + 1).unwrap_or_default(); - summary.clients = max_client.map(|client| client + 1).unwrap_or_default(); - summary - } -} - -impl RoundEvent { - fn position(&self) -> (u64, SessionId) { - match self { - Self::Action { round, client, .. } - | Self::WriteLockAcquired { round, client } - | Self::WriteConflict { round, client, .. } - | Self::Committed { round, client, .. } - | Self::RolledBack { round, client } - | Self::Read { round, client, .. } => (*round, *client), - } - } -} - -struct ConcurrentProperties; - -impl StreamingProperties for ConcurrentProperties { - fn observe( - &mut self, - _engine: &ConcurrentRelationalDbEngine, - _interaction: &RoundPlan, - observation: &RoundObservation, - ) -> Result<(), String> { - if observation.events.is_empty() { - return Err(format!( - "[ConcurrentRelationalDb] round={} produced no events", - observation.round - )); - } - - for event in &observation.events { - if let RoundEvent::Read { - kind: ReadKind::PointLookup { id }, - summary, - .. - } = event - { - if summary.row_count > 1 { - return Err(format!( - "[ConcurrentRelationalDb] round={} invalid point lookup id={id}: {summary:?}", - observation.round - )); - } - } - } - Ok(()) - } - - fn finish( - &mut self, - _engine: &ConcurrentRelationalDbEngine, - outcome: &RelationalDbConcurrentOutcome, - ) -> Result<(), String> { - if outcome.final_rows != outcome.expected_rows { - return Err(format!( - "[ConcurrentRelationalDb] final rows differ from commit-offset oracle: expected={:?} actual={:?}", - outcome.expected_rows, outcome.final_rows - )); - } - if outcome.writer_conflicts == 0 { - return Err("[ConcurrentRelationalDb] no writer-held lock contention was observed".to_string()); - } - if outcome.reader_conflicts == 0 { - return Err("[ConcurrentRelationalDb] no reader-held lock contention was observed".to_string()); - } - if outcome.reads == 0 { - return Err("[ConcurrentRelationalDb] no read sections were observed".to_string()); - } - Ok(()) - } -} - -fn collect_rows_in_tx( - db: &RelationalDB, - table_id: TableId, - tx: &RelTx, - label: &'static str, -) -> Result, String> { - let mut rows = db - .iter(tx, table_id) - .map_err(|err| format!("{label} failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - Ok(rows) -} - -fn scan_summary_in_tx( - db: &RelationalDB, - table_id: TableId, - tx: &RelTx, - label: &'static str, -) -> Result { - let mut summary = ReadSummary::default(); - for row_ref in db.iter(tx, table_id).map_err(|err| format!("{label} failed: {err}"))? { - let row = SimRow::from_product_value(row_ref.to_product_value()); - summary.add_row(&row, label)?; - } - Ok(summary) -} - -fn point_lookup_summary_in_tx( - db: &RelationalDB, - table_id: TableId, - tx: &RelTx, - id: u64, -) -> Result { - let value = AlgebraicValue::U64(id); - let mut summary = ReadSummary::default(); - for row_ref in db - .iter_by_col_eq(tx, table_id, 0u16, &value) - .map_err(|err| format!("point lookup failed: {err}"))? - { - let row = SimRow::from_product_value(row_ref.to_product_value()); - if row.id() != Some(id) { - return Err(format!( - "[ConcurrentRelationalDb] point lookup id={id} returned different row: {row:?}" - )); - } - summary.add_row(&row, "point lookup")?; - } - Ok(summary) -} - -fn concurrent_row_checksum(row: &SimRow, label: &'static str) -> Result { - let id = row - .id() - .ok_or_else(|| format!("[ConcurrentRelationalDb] {label} row missing u64 id: {row:?}"))?; - let value = match row.values.get(1) { - Some(AlgebraicValue::U64(value)) => *value, - other => { - return Err(format!( - "[ConcurrentRelationalDb] {label} row has invalid value column: {other:?} in {row:?}" - )); - } - }; - - Ok(mix64(id) - .wrapping_add(mix64(value ^ 0xa076_1d64_78bd_642f)) - .wrapping_add(mix64(row.values.len() as u64))) -} - -fn mix64(mut value: u64) -> u64 { - value = (value ^ (value >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); - value = (value ^ (value >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); - value ^ (value >> 31) -} - -fn expected_rows_from_events(events: &[RoundEvent]) -> Vec { - let mut commits = events - .iter() - .filter_map(|event| match event { - RoundEvent::Committed { - tx_offset, mutations, .. - } => Some((*tx_offset, mutations)), - _ => None, - }) - .collect::>(); - commits.sort_by_key(|(tx_offset, _)| *tx_offset); - - let mut rows = BTreeMap::::new(); - for (_tx_offset, mutations) in commits { - for mutation in mutations { - match mutation { - ConcurrentMutation::Inserted(row) => { - if let Some(id) = row.id() { - rows.insert(id, row.clone()); - } - } - ConcurrentMutation::Deleted(row) => { - if let Some(id) = row.id() { - rows.remove(&id); - } - } - } - } - } - rows.into_values().collect() -} - -fn install_concurrent_schema(db: &RelationalDB) -> anyhow::Result { - let mut tx = db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); - let table_id = db.create_table( - &mut tx, - TableSchema::new( - TableId::SENTINEL, - TableName::for_test("concurrent_rows"), - None, - vec![ - ColumnSchema::for_test(0, "id", spacetimedb_sats::AlgebraicType::U64), - ColumnSchema::for_test(1, "value", spacetimedb_sats::AlgebraicType::U64), - ], - vec![IndexSchema::for_test("concurrent_rows_id_idx", BTreeAlgorithm::from(0))], - vec![ConstraintSchema::unique_for_test("concurrent_rows_id_unique", 0)], - vec![], - StTableType::User, - StAccess::Public, - None, - Some(0.into()), - false, - None, - ), - )?; - let _ = db.commit_tx(tx)?; - Ok(table_id) -} - -fn client(index: usize) -> SessionId { - SessionId::from_index(index) -} - -fn is_unique_constraint_violation(err: &DBError) -> bool { - err.to_string().contains("Unique") || err.to_string().contains("unique") -} - -impl fmt::Display for RoundEvent { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Action { name, .. } => write!(f, "action({name})"), - event => write!(f, "{event:?}"), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::sim; - - #[test] - fn seed_12_exercises_lock_state_machine() { - let seed = DstSeed(12); - let config = RunConfig::with_max_interactions(100); - let mut runtime = sim::Runtime::new(seed).unwrap(); - - let outcome = runtime.block_on(run_generated_with_config(seed, config)).unwrap(); - - assert_eq!(outcome.rounds, 100); - assert!(outcome.committed > 0); - assert!(outcome.writer_conflicts > 0); - assert!(outcome.reader_conflicts > 0); - assert!(outcome.reads > 0); - assert_eq!(outcome.final_rows, outcome.expected_rows); - } - - #[test] - fn first_four_rounds_cover_core_lock_cases() { - let seed = DstSeed(12); - let config = RunConfig::with_max_interactions(4); - let mut runtime = sim::Runtime::new(seed).unwrap(); - - let outcome = runtime.block_on(run_generated_with_config(seed, config)).unwrap(); - - assert_eq!(outcome.rounds, 4); - assert_eq!(outcome.writer_conflicts, 1); - assert_eq!(outcome.reader_conflicts, 1); - assert!(outcome.reads >= 4); - assert_eq!(outcome.final_rows, outcome.expected_rows); - } -} diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs deleted file mode 100644 index 4e6a173c6a8..00000000000 --- a/crates/dst/src/workload/commitlog_ops/generation.rs +++ /dev/null @@ -1,279 +0,0 @@ -//! Commitlog workload source: table workload plus lifecycle and durability pressure. - -use std::collections::{BTreeSet, VecDeque}; - -use crate::{ - core::WorkloadSource, - schema::SchemaPlan, - seed::{DstRng, DstSeed}, - workload::strategy::{Index, Percent, Strategy}, - workload::{ - commitlog_ops::CommitlogInteraction, - table_ops::{strategies::ConnectionChoice, TableScenario, TableWorkloadSource}, - }, -}; - -/// Generation profile for commitlog-specific interactions layered around table ops. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(crate) struct CommitlogWorkloadProfile { - pub(crate) close_reopen_pct: usize, - pub(crate) snapshot_pct: usize, - pub(crate) create_dynamic_table_pct: usize, - pub(crate) migrate_after_create_pct: usize, - pub(crate) migrate_dynamic_table_pct: usize, - pub(crate) drop_dynamic_table_pct: usize, -} - -impl Default for CommitlogWorkloadProfile { - fn default() -> Self { - Self { - close_reopen_pct: 1, - snapshot_pct: 2, - create_dynamic_table_pct: 1, - migrate_after_create_pct: 55, - migrate_dynamic_table_pct: 6, - drop_dynamic_table_pct: 5, - } - } -} - -/// Streaming source for commitlog-oriented targets. -/// -/// This composes a base table workload with commitlog lifecycle interactions -/// instead of defining an unrelated workload language. -pub(crate) struct CommitlogWorkloadSource { - base: TableWorkloadSource, - profile: CommitlogWorkloadProfile, - rng: DstRng, - num_connections: usize, - next_slot: u32, - alive_slots: BTreeSet, - pending: VecDeque, -} - -impl CommitlogWorkloadSource { - pub fn new( - seed: DstSeed, - scenario: S, - schema: SchemaPlan, - num_connections: usize, - target_interactions: usize, - ) -> Self { - Self::with_profile( - seed, - scenario, - schema, - num_connections, - target_interactions, - CommitlogWorkloadProfile::default(), - ) - } - - pub fn with_profile( - seed: DstSeed, - scenario: S, - schema: SchemaPlan, - num_connections: usize, - target_interactions: usize, - profile: CommitlogWorkloadProfile, - ) -> Self { - Self { - base: TableWorkloadSource::new(seed.fork(123), scenario, schema, num_connections, target_interactions), - profile, - rng: seed.fork(124).rng(), - num_connections, - next_slot: 0, - alive_slots: BTreeSet::new(), - pending: VecDeque::new(), - } - } - - pub fn request_finish(&mut self) { - self.base.request_finish(); - } - - fn fill_pending(&mut self) -> bool { - let Some(base_op) = self.base.next() else { - return false; - }; - self.pending.push_back(CommitlogInteraction::Table(base_op)); - - if self.base.has_open_read_tx() || self.base.has_open_write_tx() { - return true; - } - - if Percent::new(self.profile.close_reopen_pct).sample(&mut self.rng) { - self.pending.push_back(CommitlogInteraction::CloseReopen); - } - - if Percent::new(self.profile.create_dynamic_table_pct).sample(&mut self.rng) { - let conn = ConnectionChoice { - connection_count: self.num_connections, - } - .sample(&mut self.rng); - let slot = self.next_slot; - self.next_slot = self.next_slot.saturating_add(1); - self.alive_slots.insert(slot); - self.pending - .push_back(CommitlogInteraction::CreateDynamicTable { conn, slot }); - // Frequently follow a create with migration to stress add-column + - // copy + subsequent auto-inc allocation paths. - if Percent::new(self.profile.migrate_after_create_pct).sample(&mut self.rng) { - self.pending - .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); - } - return true; - } - - if !self.alive_slots.is_empty() && Percent::new(self.profile.migrate_dynamic_table_pct).sample(&mut self.rng) { - let conn = ConnectionChoice { - connection_count: self.num_connections, - } - .sample(&mut self.rng); - let idx = Index::new(self.alive_slots.len()).sample(&mut self.rng); - let slot = *self - .alive_slots - .iter() - .nth(idx) - .expect("slot index within alive set bounds"); - self.pending - .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); - } - - if !self.alive_slots.is_empty() && Percent::new(self.profile.drop_dynamic_table_pct).sample(&mut self.rng) { - let conn = ConnectionChoice { - connection_count: self.num_connections, - } - .sample(&mut self.rng); - let idx = Index::new(self.alive_slots.len()).sample(&mut self.rng); - let slot = *self - .alive_slots - .iter() - .nth(idx) - .expect("slot index within alive set bounds"); - self.alive_slots.remove(&slot); - self.pending - .push_back(CommitlogInteraction::DropDynamicTable { conn, slot }); - } - - true - } -} - -impl CommitlogWorkloadSource { - pub fn pull_next_interaction(&mut self) -> Option { - loop { - if let Some(next) = self.pending.pop_front() { - return Some(next); - } - if !self.fill_pending() { - return None; - } - } - } -} - -impl WorkloadSource for CommitlogWorkloadSource { - type Interaction = CommitlogInteraction; - - fn next_interaction(&mut self) -> Option { - self.pull_next_interaction() - } - - fn request_finish(&mut self) { - Self::request_finish(self); - } -} - -impl Iterator for CommitlogWorkloadSource { - type Item = CommitlogInteraction; - - fn next(&mut self) -> Option { - self.pull_next_interaction() - } -} - -#[cfg(test)] -mod tests { - use spacetimedb_sats::AlgebraicType; - - use crate::{ - client::SessionId, - schema::{ColumnPlan, SchemaPlan, TablePlan}, - seed::{DstRng, DstSeed}, - workload::{ - commitlog_ops::CommitlogInteraction, - table_ops::{ScenarioPlanner, TableOperation, TableScenario, TableWorkloadInteraction}, - }, - }; - - use super::{CommitlogWorkloadProfile, CommitlogWorkloadSource}; - - #[derive(Clone)] - struct BeginThenCommitScenario; - - impl TableScenario for BeginThenCommitScenario { - fn generate_schema(&self, _rng: &mut DstRng) -> SchemaPlan { - SchemaPlan { - tables: vec![TablePlan { - name: "test_table".to_string(), - columns: vec![ColumnPlan { - name: "id".to_string(), - ty: AlgebraicType::U64, - }], - extra_indexes: vec![], - }], - } - } - - fn validate_outcome( - &self, - _schema: &SchemaPlan, - _outcome: &crate::workload::table_ops::TableWorkloadOutcome, - ) -> anyhow::Result<()> { - Ok(()) - } - - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { - if planner.active_writer() == Some(conn) { - planner.commit_tx(conn); - planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); - } else { - planner.begin_tx(conn); - planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); - } - } - } - - #[test] - fn lifecycle_interactions_wait_for_open_write_tx_to_close() { - let scenario = BeginThenCommitScenario; - let mut rng = DstSeed(1).rng(); - let schema = scenario.generate_schema(&mut rng); - let profile = CommitlogWorkloadProfile { - close_reopen_pct: 100, - snapshot_pct: 100, - create_dynamic_table_pct: 100, - migrate_after_create_pct: 100, - migrate_dynamic_table_pct: 100, - drop_dynamic_table_pct: 100, - }; - let mut source = CommitlogWorkloadSource::with_profile(DstSeed(10), scenario, schema, 1, 2, profile); - - assert!(matches!( - source.next(), - Some(CommitlogInteraction::Table(TableWorkloadInteraction { - op: TableOperation::BeginTx { .. }, - .. - })) - )); - assert!(matches!( - source.next(), - Some(CommitlogInteraction::Table(TableWorkloadInteraction { - op: TableOperation::CommitTx { .. }, - .. - })) - )); - assert!(matches!(source.next(), Some(CommitlogInteraction::CloseReopen))); - } -} diff --git a/crates/dst/src/workload/commitlog_ops/mod.rs b/crates/dst/src/workload/commitlog_ops/mod.rs deleted file mode 100644 index 62d0f99a82a..00000000000 --- a/crates/dst/src/workload/commitlog_ops/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -//! Commitlog-oriented workload that composes `table_ops` with lifecycle/chaos. - -mod generation; -mod types; - -pub(crate) use generation::CommitlogWorkloadSource; -pub use types::{ - CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary, InteractionSummary, - RuntimeSummary, SchemaSummary, SnapshotCaptureStatus, SnapshotObservation, TableOperationSummary, - TransactionSummary, -}; diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs deleted file mode 100644 index 62711866eb4..00000000000 --- a/crates/dst/src/workload/commitlog_ops/types.rs +++ /dev/null @@ -1,169 +0,0 @@ -//! Serializable interaction model for relational-db + commitlog DST. - -use crate::{ - client::SessionId, - config::CommitlogFaultProfile, - schema::SimRow, - workload::table_ops::{TableWorkloadInteraction, TableWorkloadOutcome}, -}; - -/// One interaction in the commitlog-oriented mixed workload. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum CommitlogInteraction { - /// Reused base workload interaction from `table_ops`. - Table(TableWorkloadInteraction), - /// Create a dynamic user table for a logical slot. - CreateDynamicTable { conn: SessionId, slot: u32 }, - /// Drop a previously created dynamic user table. - DropDynamicTable { conn: SessionId, slot: u32 }, - /// Migrate dynamic table schema for a slot. - MigrateDynamicTable { conn: SessionId, slot: u32 }, - /// Close and restart the database from durable history. - CloseReopen, -} - -/// Successful run summary for commitlog target. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct CommitlogWorkloadOutcome { - pub applied_steps: usize, - pub durable_commit_count: usize, - pub replay_table_count: usize, - pub schema: SchemaSummary, - pub interactions: InteractionSummary, - pub table_ops: TableOperationSummary, - pub transactions: TransactionSummary, - pub runtime: RuntimeSummary, - pub disk_faults: DiskFaultSummary, - pub snapshot_faults: DiskFaultSummary, - pub replay: DurableReplaySummary, - pub table: TableWorkloadOutcome, -} - -/// State observed after opening a fresh database from durable commitlog history. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct DurableReplaySummary { - pub durable_offset: Option, - pub restored_snapshot_offset: Option, - pub latest_snapshot_offset: Option, - pub base_rows: Vec>, - pub dynamic_table_count: usize, -} - -/// Snapshot capture status observed by a target. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum SnapshotCaptureStatus { - Captured { offset: u64 }, - SkippedOpenTransaction, - SkippedNoSnapshotCreated, - SkippedInjectedFault, -} - -/// Snapshot capture facts exposed to properties. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct SnapshotObservation { - pub durable_offset: Option, - pub latest_before: Option, - pub latest_after: Option, - pub status: SnapshotCaptureStatus, -} - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct SchemaSummary { - pub initial_tables: usize, - pub initial_columns: usize, - pub max_columns_per_table: usize, - pub initial_indexes: usize, - pub extra_indexes: usize, -} - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct InteractionSummary { - pub table: usize, - pub create_dynamic_table: usize, - pub drop_dynamic_table: usize, - pub migrate_dynamic_table: usize, - pub close_reopen_requested: usize, - pub close_reopen_applied: usize, - pub close_reopen_skipped: usize, - pub snapshot_requested: usize, - pub snapshot_created: usize, - pub snapshot_skipped: usize, - pub skipped: usize, -} - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct TableOperationSummary { - /// Explicit write transaction starts. - pub begin_tx: usize, - /// Explicit write transaction commits. - pub commit_tx: usize, - /// Explicit write transaction rollbacks. - pub rollback_tx: usize, - /// Long read snapshot starts. - pub begin_read_tx: usize, - /// Long read snapshot releases. - pub release_read_tx: usize, - /// Expected failures when a second writer tries to begin. - pub begin_tx_conflict: usize, - /// Expected failures when a second writer tries to write. - pub write_conflict_insert: usize, - /// Fresh single-row inserts. - pub insert: usize, - /// Single-row deletes. - pub delete: usize, - /// Exact full-row reinserts that should be idempotent no-ops. - pub exact_duplicate_insert: usize, - /// Same primary id with different payload; should violate the unique key. - pub unique_key_conflict_insert: usize, - /// Deletes of absent rows that should report no mutation. - pub delete_missing: usize, - /// Multi-row inserts. - pub batch_insert: usize, - /// Multi-row deletes. - pub batch_delete: usize, - /// Delete followed by inserting the same row. - pub reinsert: usize, - /// Add-column schema changes against live base tables. - pub add_column: usize, - /// Add-index schema changes against live base tables. - pub add_index: usize, - /// Primary-id lookup oracle checks. - pub point_lookup: usize, - /// Column equality count oracle checks. - pub predicate_count: usize, - /// Indexed range scan oracle checks. - pub range_scan: usize, - /// Full scan oracle checks. - pub full_scan: usize, -} - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct TransactionSummary { - pub explicit_begin: usize, - pub explicit_commit: usize, - pub explicit_rollback: usize, - pub auto_commit: usize, - pub read_tx: usize, - pub durable_commit_count: usize, -} - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct RuntimeSummary { - pub known_runtime_tasks_scheduled: usize, - pub durability_actors_started: usize, - pub runtime_alive_tasks: Option, -} - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct DiskFaultSummary { - pub profile: CommitlogFaultProfile, - pub latency: usize, - pub short_read: usize, - pub short_write: usize, - pub read_error: usize, - pub write_error: usize, - pub flush_error: usize, - pub fsync_error: usize, - pub open_error: usize, - pub metadata_error: usize, -} diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs index 52482e737f1..faf3c04b5f2 100644 --- a/crates/dst/src/workload/mod.rs +++ b/crates/dst/src/workload/mod.rs @@ -1,5 +1,4 @@ //! Shared workload generators reused by multiple DST targets. -pub mod commitlog_ops; -pub(crate) mod strategy; pub mod table_ops; +pub(crate) mod strategy; diff --git a/crates/dst/src/workload/strategy.rs b/crates/dst/src/workload/strategy.rs index 94108eced8c..6c70ebb9e94 100644 --- a/crates/dst/src/workload/strategy.rs +++ b/crates/dst/src/workload/strategy.rs @@ -3,11 +3,11 @@ //! This is intentionally minimal: we keep DST's streaming execution model and //! use strategies only for typed, composable input generation. -use crate::seed::DstRng; +use crate::sim::Rng; /// Typed strategy that can sample values from the shared deterministic RNG. pub(crate) trait Strategy: Sized { - fn sample(&self, rng: &mut DstRng) -> T; + fn sample(&self, rng: &Rng) -> T; } /// Picks a value in `[0, upper)`. @@ -24,7 +24,7 @@ impl Index { } impl Strategy for Index { - fn sample(&self, rng: &mut DstRng) -> usize { + fn sample(&self, rng: &Rng) -> usize { rng.index(self.upper) } } @@ -43,7 +43,7 @@ impl Percent { } impl Strategy for Percent { - fn sample(&self, rng: &mut DstRng) -> bool { + fn sample(&self, rng: &Rng) -> bool { Index::new(100).sample(rng) < self.percent } } @@ -64,7 +64,7 @@ impl Weighted { } impl Strategy for Weighted { - fn sample(&self, rng: &mut DstRng) -> T { + fn sample(&self, rng: &Rng) -> T { let mut pick = Index::new(self.total_weight).sample(rng); for (weight, value) in &self.options { if pick < *weight { @@ -81,25 +81,25 @@ impl Strategy for Weighted { #[cfg(test)] mod tests { - use crate::seed::DstSeed; + use crate::sim::Rng; use super::{Index, Percent, Strategy, Weighted}; #[test] fn weighted_is_deterministic_for_seed() { let strategy = Weighted::new(vec![(1, 10usize), (2, 20usize), (3, 30usize)]); - let mut rng_a = DstSeed(7).rng(); - let mut rng_b = DstSeed(7).rng(); - let a = (0..16).map(|_| strategy.sample(&mut rng_a)).collect::>(); - let b = (0..16).map(|_| strategy.sample(&mut rng_b)).collect::>(); + let rng_a = Rng::new(7); + let rng_b = Rng::new(7); + let a = (0..16).map(|_| strategy.sample(&rng_a)).collect::>(); + let b = (0..16).map(|_| strategy.sample(&rng_b)).collect::>(); assert_eq!(a, b); } #[test] fn index_strategy_respects_bounds() { - let mut rng = DstSeed(123).rng(); + let rng = Rng::new(123); for _ in 0..64 { - let idx = Index::new(5).sample(&mut rng); + let idx = Index::new(5).sample(&rng); assert!(idx < 5); } } diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index dec276060b2..b6050fd8e18 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -4,7 +4,7 @@ use crate::{ client::SessionId, core::WorkloadSource, schema::{ColumnPlan, SchemaPlan, TablePlan}, - seed::{DstRng, DstSeed}, + sim::{fork_seed, Rng}, workload::strategy::{Index, Percent, Strategy}, }; @@ -21,22 +21,13 @@ use super::{ /// memory up front. #[derive(Clone, Debug)] pub struct TableWorkloadSource { - // Deterministic source for all planner choices. - rng: DstRng, - // Scenario-specific workload policy layered on top of the shared model. + rng: Rng, scenario: S, - // Generator-side model used to decide what interactions are legal. model: GenerationModel, num_connections: usize, - // Soft budget for scenario-generated interactions. Finish mode may emit a - // few extra commit/follow-up interactions to close open transactions. target_interactions: usize, emitted: usize, - // When the budget is exhausted, we walk connections in order and commit any - // still-open transaction so the stream ends in a clean state. finalize_conn: usize, - // Scenario code can enqueue a burst of interactions at once: for example a - // mutation followed by one or more property checks. pending: VecDeque, finished: bool, } @@ -45,7 +36,7 @@ pub struct TableWorkloadSource { /// inspect the current model and enqueue interactions without owning the whole /// stream state machine. pub struct ScenarioPlanner<'a> { - rng: &'a mut DstRng, + rng: &'a Rng, model: &'a mut GenerationModel, pending: &'a mut VecDeque, } @@ -98,10 +89,6 @@ impl<'a> ScenarioPlanner<'a> { self.model.rollback(conn); } - /// Tries to emit one transaction control interaction for `conn`. - /// - /// The shared generator owns transaction lifecycle so scenario code can - /// focus on domain operations like inserts, deletes, and range checks. pub fn maybe_control_tx( &mut self, conn: SessionId, @@ -197,14 +184,14 @@ impl<'a> ScenarioPlanner<'a> { impl TableWorkloadSource { pub fn new( - seed: DstSeed, + seed: u64, scenario: S, schema: SchemaPlan, num_connections: usize, target_interactions: usize, ) -> Self { Self { - rng: seed.fork(17).rng(), + rng: Rng::new(fork_seed(seed, 17)), scenario, model: GenerationModel::new(&schema, num_connections, seed), num_connections, @@ -220,18 +207,18 @@ impl TableWorkloadSource { self.target_interactions = self.emitted; } + #[allow(dead_code)] pub fn has_open_read_tx(&self) -> bool { self.model.any_read_tx() } + #[allow(dead_code)] pub fn has_open_write_tx(&self) -> bool { self.model.active_writer().is_some() } fn fill_pending(&mut self) { if self.emitted >= self.target_interactions { - // Once the workload budget is spent, stop asking the scenario for - // more work and only flush any open transaction state. while self.finalize_conn < self.num_connections { let conn = SessionId::from_index(self.finalize_conn); self.finalize_conn += 1; @@ -250,16 +237,12 @@ impl TableWorkloadSource { return; } - // Transactions stay open across interactions, but each API call is a - // separate synchronous step. Always choose a connection uniformly so - // later steps can naturally observe lock contention instead of the - // planner steering around open readers or writers. let conn = ConnectionChoice { connection_count: self.num_connections, } - .sample(&mut self.rng); + .sample(&self.rng); let mut planner = ScenarioPlanner { - rng: &mut self.rng, + rng: &self.rng, model: &mut self.model, pending: &mut self.pending, }; @@ -270,8 +253,6 @@ impl TableWorkloadSource { impl TableWorkloadSource { pub fn pull_next_interaction(&mut self) -> Option { loop { - // Scenario planning fills `pending` in bursts, but the iterator - // surface stays one interaction at a time. if let Some(interaction) = self.pending.pop_front() { self.emitted += 1; return Some(interaction); diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs index f75470bf56a..facf8a92734 100644 --- a/crates/dst/src/workload/table_ops/mod.rs +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -6,8 +6,6 @@ mod scenarios; pub(crate) mod strategies; mod types; -#[cfg(test)] -pub(crate) use generation::ScenarioPlanner; pub(crate) use generation::TableWorkloadSource; pub(crate) use model::{PredictedOutcome, TableOracle}; pub use scenarios::TableScenarioId; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index 0b498c3ef13..f56b1db5a25 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -5,7 +5,7 @@ use spacetimedb_sats::AlgebraicValue; use crate::{ client::SessionId, schema::{distinct_value_for_type, generate_value_for_type, ColumnPlan, SchemaPlan, SimRow}, - seed::{DstRng, DstSeed}, + sim::{fork_seed, Rng}, }; use super::{TableErrorKind, TableOperation}; @@ -33,19 +33,19 @@ pub(crate) struct PendingConnection { } impl GenerationModel { - pub(crate) fn new(schema: &SchemaPlan, num_connections: usize, seed: DstSeed) -> Self { + pub(crate) fn new(schema: &SchemaPlan, num_connections: usize, seed: u64) -> Self { Self { schema: schema.clone(), connections: vec![PendingConnection::default(); num_connections], committed: vec![Vec::new(); schema.tables.len()], next_ids: (0..schema.tables.len()) - .map(|idx| seed.fork(idx as u64 + 100).0) + .map(|idx| fork_seed(seed, idx as u64 + 100)) .collect(), active_writer: None, } } - pub(crate) fn make_row(&mut self, rng: &mut DstRng, table: usize) -> SimRow { + pub(crate) fn make_row(&mut self, rng: &Rng, table: usize) -> SimRow { let table_plan = &self.schema.tables[table]; let id = self.next_ids[table]; self.next_ids[table] = self.next_ids[table].wrapping_add(1).max(1); @@ -76,7 +76,7 @@ impl GenerationModel { rows } - pub(crate) fn absent_row(&mut self, rng: &mut DstRng, conn: SessionId, table: usize) -> SimRow { + pub(crate) fn absent_row(&mut self, rng: &Rng, conn: SessionId, table: usize) -> SimRow { let mut row = self.make_row(rng, table); while self.visible_rows(conn, table).iter().any(|candidate| candidate == &row) { row = self.make_row(rng, table); @@ -84,7 +84,7 @@ impl GenerationModel { row } - pub(crate) fn unique_key_conflict_row(&self, rng: &mut DstRng, table: usize, source: &SimRow) -> Option { + pub(crate) fn unique_key_conflict_row(&self, rng: &Rng, table: usize, source: &SimRow) -> Option { let table_plan = &self.schema.tables[table]; let value_count = source.values.len().min(table_plan.columns.len()); if value_count <= 1 { diff --git a/crates/dst/src/workload/table_ops/scenarios/banking.rs b/crates/dst/src/workload/table_ops/scenarios/banking.rs deleted file mode 100644 index 534f8ca504c..00000000000 --- a/crates/dst/src/workload/table_ops/scenarios/banking.rs +++ /dev/null @@ -1,108 +0,0 @@ -use spacetimedb_sats::AlgebraicType; - -use crate::{ - client::SessionId, - schema::{ColumnPlan, SchemaPlan, TablePlan}, -}; - -use super::super::{generation::ScenarioPlanner, TableWorkloadInteraction, TableWorkloadOutcome}; - -pub fn generate_schema() -> SchemaPlan { - SchemaPlan { - tables: vec![ - TablePlan { - name: "debit_accounts".into(), - columns: vec![ - ColumnPlan { - name: "id".into(), - ty: AlgebraicType::U64, - }, - ColumnPlan { - name: "balance".into(), - ty: AlgebraicType::U64, - }, - ], - extra_indexes: vec![vec![1]], - }, - TablePlan { - name: "credit_accounts".into(), - columns: vec![ - ColumnPlan { - name: "id".into(), - ty: AlgebraicType::U64, - }, - ColumnPlan { - name: "balance".into(), - ty: AlgebraicType::U64, - }, - ], - extra_indexes: vec![vec![1]], - }, - ], - } -} - -pub fn validate_outcome(schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { - let debit_idx = schema - .tables - .iter() - .position(|table| table.name == "debit_accounts") - .ok_or_else(|| anyhow::anyhow!("missing debit_accounts table"))?; - let credit_idx = schema - .tables - .iter() - .position(|table| table.name == "credit_accounts") - .ok_or_else(|| anyhow::anyhow!("missing credit_accounts table"))?; - - let debit_rows = outcome - .final_rows - .get(debit_idx) - .ok_or_else(|| anyhow::anyhow!("missing debit_accounts rows"))?; - let credit_rows = outcome - .final_rows - .get(credit_idx) - .ok_or_else(|| anyhow::anyhow!("missing credit_accounts rows"))?; - - if debit_rows != credit_rows { - anyhow::bail!("banking tables diverged: debit={debit_rows:?} credit={credit_rows:?}"); - } - Ok(()) -} - -pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { - if planner.maybe_control_tx(conn, 25, 20, 10) { - return; - } - - let debit_rows = planner.visible_rows(conn, 0); - let choose_insert = debit_rows.is_empty() || planner.roll_percent(65); - let wrap_pair_in_tx = planner.active_writer().is_none(); - if wrap_pair_in_tx { - planner.begin_tx(conn); - planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); - } - if choose_insert { - let row = planner.make_row(0); - let mirror = row.clone(); - planner.insert(conn, 0, row.clone()); - planner.insert(conn, 1, mirror.clone()); - planner.push_interaction(TableWorkloadInteraction::insert(conn, 0, row.clone())); - planner.push_interaction(TableWorkloadInteraction::insert(conn, 1, mirror.clone())); - if wrap_pair_in_tx { - planner.commit_tx(conn); - planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); - } - return; - } - - let row = debit_rows[planner.choose_index(debit_rows.len())].clone(); - let mirror = row.clone(); - planner.delete(conn, 0, row.clone()); - planner.delete(conn, 1, mirror.clone()); - planner.push_interaction(TableWorkloadInteraction::delete(conn, 0, row.clone())); - planner.push_interaction(TableWorkloadInteraction::delete(conn, 1, mirror.clone())); - if wrap_pair_in_tx { - planner.commit_tx(conn); - planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); - } -} diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs index ac024a87655..4619473dc36 100644 --- a/crates/dst/src/workload/table_ops/scenarios/mod.rs +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -1,29 +1,20 @@ -mod banking; mod random_crud; -use crate::{client::SessionId, schema::SchemaPlan, seed::DstRng}; +use crate::{client::SessionId, schema::SchemaPlan, sim::Rng}; use super::{generation::ScenarioPlanner, TableScenario, TableWorkloadOutcome}; #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub(crate) struct RandomCrudScenario; -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub(crate) struct IndexedRangesScenario; - -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub(crate) struct BankingScenario; - #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub enum TableScenarioId { #[default] RandomCrud, - IndexedRanges, - Banking, } impl TableScenario for RandomCrudScenario { - fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { + fn generate_schema(&self, rng: &Rng) -> SchemaPlan { random_crud::generate_schema(rng) } @@ -36,56 +27,22 @@ impl TableScenario for RandomCrudScenario { } } -impl TableScenario for BankingScenario { - fn generate_schema(&self, _rng: &mut DstRng) -> SchemaPlan { - banking::generate_schema() - } - - fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { - banking::validate_outcome(schema, outcome) - } - - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { - banking::fill_pending(planner, conn); - } -} - -impl TableScenario for IndexedRangesScenario { - fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { - random_crud::generate_indexed_ranges_schema(rng) - } - - fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { - random_crud::validate_outcome(schema, outcome) - } - - fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { - random_crud::fill_pending_indexed_ranges(planner, conn); - } -} - impl TableScenario for TableScenarioId { - fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { + fn generate_schema(&self, rng: &Rng) -> SchemaPlan { match self { Self::RandomCrud => RandomCrudScenario.generate_schema(rng), - Self::IndexedRanges => IndexedRangesScenario.generate_schema(rng), - Self::Banking => BankingScenario.generate_schema(rng), } } fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { match self { Self::RandomCrud => RandomCrudScenario.validate_outcome(schema, outcome), - Self::IndexedRanges => IndexedRangesScenario.validate_outcome(schema, outcome), - Self::Banking => BankingScenario.validate_outcome(schema, outcome), } } fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { match self { Self::RandomCrud => RandomCrudScenario.fill_pending(planner, conn), - Self::IndexedRanges => IndexedRangesScenario.fill_pending(planner, conn), - Self::Banking => BankingScenario.fill_pending(planner, conn), } } } diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index 49c96f150a9..5864592e0e6 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -5,7 +5,7 @@ use spacetimedb_sats::AlgebraicType; use crate::{ client::SessionId, schema::{default_value_for_type, generate_supported_type, ColumnPlan, SchemaPlan, SimRow, TablePlan}, - seed::DstRng, + sim::Rng, workload::strategy::{Index, Percent, Strategy}, }; @@ -60,39 +60,11 @@ const RANDOM_CRUD_PROFILE: TableWorkloadProfile = TableWorkloadProfile { add_index_pct: 2, }; -const INDEXED_RANGES_PROFILE: TableWorkloadProfile = TableWorkloadProfile { - min_tables: 2, - table_count_choices: 2, - min_extra_cols: 3, - extra_col_choices: 3, - preferred_range_cols: 3, - prefer_range_compatible_pct: 90, - prefer_u64_pct: 90, - single_index_pct: 100, - composite2_index_pct: 100, - composite3_index_pct: 75, - insert_pct: 55, - begin_tx_pct: 20, - commit_tx_pct: 15, - rollback_tx_pct: 8, - begin_read_tx_pct: 6, - release_read_tx_pct: 30, - empty_tx_pct: 2, - exact_duplicate_insert_pct: 3, - unique_key_conflict_insert_pct: 4, - add_column_pct: 2, - add_index_pct: 4, -}; - -pub fn generate_schema(rng: &mut DstRng) -> SchemaPlan { +pub fn generate_schema(rng: &Rng) -> SchemaPlan { generate_schema_with_profile(rng, RANDOM_CRUD_PROFILE) } -pub fn generate_indexed_ranges_schema(rng: &mut DstRng) -> SchemaPlan { - generate_schema_with_profile(rng, INDEXED_RANGES_PROFILE) -} - -fn generate_schema_with_profile(rng: &mut DstRng, profile: TableWorkloadProfile) -> SchemaPlan { +fn generate_schema_with_profile(rng: &Rng, profile: TableWorkloadProfile) -> SchemaPlan { let table_count = profile.min_tables + Index::new(profile.table_count_choices).sample(rng); let mut tables = Vec::with_capacity(table_count); @@ -158,10 +130,6 @@ pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { fill_pending_with_profile(planner, conn, RANDOM_CRUD_PROFILE); } -pub fn fill_pending_indexed_ranges(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { - fill_pending_with_profile(planner, conn, INDEXED_RANGES_PROFILE); -} - fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: SessionId, profile: TableWorkloadProfile) { if planner.has_read_tx(conn) { let table = planner.choose_table(); diff --git a/crates/dst/src/workload/table_ops/strategies.rs b/crates/dst/src/workload/table_ops/strategies.rs index 13d04d2054c..42dbc6c2ee4 100644 --- a/crates/dst/src/workload/table_ops/strategies.rs +++ b/crates/dst/src/workload/table_ops/strategies.rs @@ -2,7 +2,7 @@ use crate::{ client::SessionId, - seed::DstRng, + sim::Rng, workload::strategy::{Index, Strategy, Weighted}, }; @@ -13,7 +13,7 @@ pub(crate) struct ConnectionChoice { } impl Strategy for ConnectionChoice { - fn sample(&self, rng: &mut DstRng) -> SessionId { + fn sample(&self, rng: &Rng) -> SessionId { SessionId::from_index(Index::new(self.connection_count).sample(rng)) } } @@ -25,7 +25,7 @@ pub(crate) struct TableChoice { } impl Strategy for TableChoice { - fn sample(&self, rng: &mut DstRng) -> usize { + fn sample(&self, rng: &Rng) -> usize { Index::new(self.table_count).sample(rng) } } @@ -48,7 +48,7 @@ pub(crate) struct TxControlChoice { } impl Strategy for TxControlChoice { - fn sample(&self, rng: &mut DstRng) -> TxControlAction { + fn sample(&self, rng: &Rng) -> TxControlAction { let begin = self.begin_pct.min(100); let commit = self.commit_pct.min(100); let rollback = self.rollback_pct.min(100); diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index 96947a509bc..6b589b0cdaf 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -5,7 +5,7 @@ use spacetimedb_sats::AlgebraicValue; use crate::{ client::SessionId, schema::{ColumnPlan, SchemaPlan, SimRow}, - seed::DstRng, + sim::Rng, }; use super::generation::ScenarioPlanner; @@ -15,7 +15,7 @@ use super::generation::ScenarioPlanner; /// A scenario supplies the initial schema, scenario-specific commit-time /// properties, and any final invariant over the collected outcome. pub(crate) trait TableScenario: Clone { - fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan; + fn generate_schema(&self, rng: &Rng) -> SchemaPlan; fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()>; fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId); } From b3a08c4fb613a543ba6f62626b6b101267128eec Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 15 Jun 2026 16:02:30 +0530 Subject: [PATCH 74/74] sync --- crates/dst/Cargo.toml | 4 +- crates/dst/README.md | 27 +- crates/dst/src/config.rs | 67 ++- crates/dst/src/core/mod.rs | 147 +++++- crates/dst/src/main.rs | 87 +++- crates/dst/src/properties.rs | 31 +- crates/dst/src/properties/rules.rs | 85 +++- crates/dst/src/properties/runtime.rs | 31 +- crates/dst/src/schema.rs | 3 + crates/dst/src/sim/commitlog.rs | 15 +- crates/dst/src/sim/mod.rs | 1 + crates/dst/src/sim/snapshot.rs | 10 +- crates/dst/src/sim/storage_faults.rs | 22 +- crates/dst/src/sim/time.rs | 19 +- crates/dst/src/targets/descriptor.rs | 20 +- .../src/targets/relational_db_commitlog.rs | 427 ++++++++++++++---- crates/dst/src/workload/mod.rs | 3 +- .../dst/src/workload/table_ops/generation.rs | 21 +- crates/dst/src/workload/table_ops/model.rs | 107 ++++- .../src/workload/table_ops/scenarios/mod.rs | 21 + .../table_ops/scenarios/random_crud.rs | 55 +++ crates/dst/src/workload/table_ops/types.rs | 69 ++- crates/engine/src/metrics.rs | 6 + crates/runtime/src/lib.rs | 5 - 24 files changed, 1041 insertions(+), 242 deletions(-) diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml index c3e2b3ea519..d42e5850884 100644 --- a/crates/dst/Cargo.toml +++ b/crates/dst/Cargo.toml @@ -19,9 +19,9 @@ anyhow.workspace = true clap.workspace = true futures-util.workspace = true spacetimedb-datastore = { workspace = true, features = ["test"] } -spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0", features = ["test"] } +spacetimedb_core = { package = "spacetimedb-core", path = "../core", features = ["test"] } spacetimedb-commitlog = { workspace = true, features = ["test"] } -spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0", features = ["test"] } +spacetimedb-durability = { workspace = true, features = ["test"] } spacetimedb-lib.workspace = true spacetimedb-snapshot.workspace = true spacetimedb-primitives.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md index e9c756a5646..32de778f2ee 100644 --- a/crates/dst/README.md +++ b/crates/dst/README.md @@ -21,6 +21,8 @@ in progress. - Generation, execution, and property checking stay separate so failures are diagnosable as workload bugs, target bugs, or weak assertions. - Runs stream interactions instead of materializing a full plan by default. +- Properties stream target state through accessors instead of storing full-scan + observations or final row snapshots. - Fault injection is explicit, configurable, and summarized in the outcome. - Shared probability and weighting logic belongs in `workload::strategy`, not ad hoc scenario code. @@ -39,9 +41,11 @@ CLI -> TargetDescriptor -> WorkloadSource -> TargetEngine -> Observation The core contracts are: - `WorkloadSource`: deterministic pull-based interaction stream. -- `TargetEngine`: target-specific execution and outcome collection. +- `TargetEngine`: target-specific execution and cheap outcome summaries. - `StreamingProperties`: reusable property checks over observations and target accessors. +- `TargetPropertyAccess`: streamed target state access for checks that need + table rows; observations and outcomes should not carry full-table copies. ## Client Model @@ -135,17 +139,15 @@ Current property families include: ## Fault Injection `relational-db-commitlog` can wrap the in-memory commitlog repo in -`BuggifiedRepo`. Fault decisions are deterministic from the run seed and +`FaultableRepo`. Fault decisions are deterministic from the run seed and summarized in the final outcome. Profiles: - `off`: no injected disk behavior. -- `light`: latency and occasional short I/O. -- `default`: stronger latency and short I/O pressure. -- `aggressive`: higher latency and short I/O rates. I/O error hooks exist but - are currently disabled in profile-driven runs because local durability does - not yet classify those errors as recoverable target outcomes. +- `light`: low-probability latency, short I/O, and transient storage errors. +- `default`: moderate-probability latency, short I/O, and transient storage errors. +- `aggressive`: high-probability latency, short I/O, and transient storage errors. ## Running @@ -155,11 +157,10 @@ Fast local run: cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --seed 42 --max-interactions 200 ``` -Scenario examples: +Scenario example: ```bash -cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario banking --duration 5m -cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario indexed-ranges --duration 5m +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario random-crud --duration 5m ``` Run with commitlog faults: @@ -180,6 +181,10 @@ RUST_LOG=trace cargo run -p spacetimedb-dst -- run --target relational-db-commit ## Run Budgets +`--harness-phase-timeout` is a virtual-time watchdog for one harness phase +(`execute_interaction`, `finish`, or `collect_outcome`). It defaults to `30s` and +can be disabled with `off`, but disabling it makes the harness deadlock-prone. + Prefer `--max-interactions` when reporting or replaying a failure. It is the deterministic interaction budget, so target, scenario, seed, interaction count, and fault profile are enough to rerun the same generated stream. @@ -200,7 +205,7 @@ Start here: - `src/properties.rs`: property catalog and oracle/model checks. - `src/targets/relational_db_commitlog.rs`: target adapter for RelationalDB, commitlog durability, fault injection, close/reopen, and replay. -- `src/targets/buggified_repo.rs`: deterministic disk-like fault layer. +- `src/sim/commitlog.rs`: deterministic disk-like commitlog fault layer. ## Adding A New Target diff --git a/crates/dst/src/config.rs b/crates/dst/src/config.rs index 1f37e217fb8..fd13c06fd62 100644 --- a/crates/dst/src/config.rs +++ b/crates/dst/src/config.rs @@ -1,13 +1,13 @@ //! Shared run-budget configuration for DST targets. -use std::time::{Duration, Instant}; +use std::{ + fmt, + time::{Duration, Instant}, +}; /// Storage fault-injection profile for commitlog and snapshot wrappers. -/// -/// These are not CLI options yet; they are programmatic knobs for targeted -/// fault-injection tests. #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub(crate) enum CommitlogFaultProfile { +pub enum CommitlogFaultProfile { /// No faults injected regardless of buggify state. Off, /// Low probability latency and short I/O only. @@ -19,7 +19,54 @@ pub(crate) enum CommitlogFaultProfile { Aggressive, } +impl CommitlogFaultProfile { + pub fn parse(value: &str) -> anyhow::Result { + match value { + "off" => Ok(Self::Off), + "light" => Ok(Self::Light), + "default" => Ok(Self::Default), + "aggressive" => Ok(Self::Aggressive), + _ => anyhow::bail!( + "unsupported commitlog fault profile: {value}; expected one of: off, light, default, aggressive" + ), + } + } + + pub const fn as_str(self) -> &'static str { + match self { + Self::Off => "off", + Self::Light => "light", + Self::Default => "default", + Self::Aggressive => "aggressive", + } + } +} + +impl fmt::Display for CommitlogFaultProfile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct StorageFaultSummary { + pub profile: CommitlogFaultProfile, + pub latency: usize, + pub short_read: usize, + pub short_write: usize, + pub read_error: usize, + pub write_error: usize, + pub flush_error: usize, + pub fsync_error: usize, + pub open_error: usize, + pub metadata_error: usize, + pub no_space: usize, + pub partial_failure: usize, +} + /// Common stop conditions for generated DST runs. +pub const DEFAULT_HARNESS_PHASE_TIMEOUT_MS: u64 = 30_000; + #[derive(Clone, Debug, Eq, PartialEq)] pub struct RunConfig { /// Hard cap on generated interactions. `None` means no interaction budget. @@ -34,6 +81,10 @@ pub struct RunConfig { /// with host speed and runtime behavior. Use `max_interactions` when a /// failure needs precise replay. pub max_duration_ms: Option, + /// Virtual-time watchdog for each target execution and collection phase. + /// `None` disables the watchdog. + pub harness_phase_timeout_ms: Option, + pub commitlog_fault_profile: CommitlogFaultProfile, } impl Default for RunConfig { @@ -41,6 +92,8 @@ impl Default for RunConfig { Self { max_interactions: None, max_duration_ms: None, + harness_phase_timeout_ms: Some(DEFAULT_HARNESS_PHASE_TIMEOUT_MS), + commitlog_fault_profile: CommitlogFaultProfile::Default, } } } @@ -50,6 +103,8 @@ impl RunConfig { Self { max_interactions: Some(max_interactions), max_duration_ms: None, + harness_phase_timeout_ms: Some(DEFAULT_HARNESS_PHASE_TIMEOUT_MS), + commitlog_fault_profile: CommitlogFaultProfile::Default, } } @@ -57,6 +112,8 @@ impl RunConfig { Ok(Self { max_interactions: None, max_duration_ms: Some(parse_duration_spec(duration)?.as_millis() as u64), + harness_phase_timeout_ms: Some(DEFAULT_HARNESS_PHASE_TIMEOUT_MS), + commitlog_fault_profile: CommitlogFaultProfile::Default, }) } diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs index 400c132a35f..deb4da25d73 100644 --- a/crates/dst/src/core/mod.rs +++ b/crates/dst/src/core/mod.rs @@ -5,6 +5,7 @@ use std::{ fmt::Debug, future::Future, panic::{self, AssertUnwindSafe}, + time::Duration, }; use crate::config::RunConfig; @@ -32,6 +33,19 @@ pub trait TargetEngine { fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a; } +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub struct RunStats { + pub interactions_executed: usize, +} + +pub trait RunOutcome { + fn record_run_stats(&mut self, stats: RunStats); +} + +impl RunOutcome for () { + fn record_run_stats(&mut self, _stats: RunStats) {} +} + /// Property runtime contract for the shared streaming runner. pub trait StreamingProperties where @@ -52,39 +66,53 @@ where I: Clone + Debug, S: WorkloadSource, E: TargetEngine, + E::Outcome: RunOutcome, P: StreamingProperties, { let deadline = cfg.deadline(); + let phase_timeout = cfg.harness_phase_timeout_ms.map(Duration::from_millis); let mut step = 0usize; loop { + if cfg.max_interactions.is_some_and(|max| step >= max) { + break; + } if deadline.is_some_and(|d| std::time::Instant::now() >= d) { source.request_finish(); } let Some(interaction) = source.next_interaction() else { break; }; - let execution = guard_target("execute_interaction", step, Some(&interaction), || { + let execution = guard_target("execute_interaction", step, Some(&interaction), phase_timeout, || { engine.execute_interaction(&interaction) }) .await .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; let observation = execution.map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; - properties - .observe(&engine, &interaction, &observation) - .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; + let property_result = guard_sync("properties.observe", step, Some(&interaction), || { + properties.observe(&engine, &interaction, &observation) + }) + .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; + property_result.map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; step = step.saturating_add(1); } - guard_target("finish", step, Option::<&I>::None, || async { + guard_target("finish", step, Option::<&I>::None, phase_timeout, || async { engine.finish(); }) .await .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; - let outcome = guard_target("collect_outcome", step, Option::<&I>::None, || engine.collect_outcome()) - .await - .map_err(|e| anyhow::anyhow!("property violation while collecting outcome: {e}"))??; - properties - .finish(&engine, &outcome) - .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; + let mut outcome = guard_target("collect_outcome", step, Option::<&I>::None, phase_timeout, || { + engine.collect_outcome() + }) + .await + .map_err(|e| anyhow::anyhow!("property violation while collecting outcome: {e}"))??; + outcome.record_run_stats(RunStats { + interactions_executed: step, + }); + let property_result = guard_sync("properties.finish", step, Option::<&I>::None, || { + properties.finish(&engine, &outcome) + }) + .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; + property_result.map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; Ok(outcome) } @@ -92,6 +120,7 @@ async fn guard_target( phase: &'static str, step: usize, interaction: Option<&I>, + timeout: Option, make_future: impl FnOnce() -> Fut, ) -> Result where @@ -100,10 +129,39 @@ where { let future = panic::catch_unwind(AssertUnwindSafe(make_future)) .map_err(|payload| not_crash_error(phase, step, interaction, &payload))?; - AssertUnwindSafe(future) - .catch_unwind() - .await - .map_err(|payload| not_crash_error(phase, step, interaction, &payload)) + let guarded = AssertUnwindSafe(future).catch_unwind(); + + match timeout { + Some(timeout) => match crate::sim::time::timeout(timeout, guarded).await { + Ok(Ok(value)) => Ok(value), + Ok(Err(payload)) => Err(not_crash_error(phase, step, interaction, &payload)), + Err(elapsed) => Err(timeout_error(phase, step, interaction, elapsed.duration())), + }, + None => guarded + .await + .map_err(|payload| not_crash_error(phase, step, interaction, &payload)), + } +} + +fn guard_sync( + phase: &'static str, + step: usize, + interaction: Option<&I>, + f: impl FnOnce() -> T, +) -> Result +where + I: Debug, +{ + panic::catch_unwind(AssertUnwindSafe(f)).map_err(|payload| not_crash_error(phase, step, interaction, &payload)) +} + +fn timeout_error(phase: &'static str, step: usize, interaction: Option<&I>, timeout: Duration) -> String { + match interaction { + Some(interaction) => format!( + "[Timeout] target did not complete {phase} within {timeout:?} at step {step}: interaction={interaction:?}" + ), + None => format!("[Timeout] target did not complete {phase} within {timeout:?} after step {step}"), + } } fn not_crash_error( @@ -174,12 +232,15 @@ mod tests { phase: PanicPhase, } + struct PendingEngine; + impl PanicEngine { fn new(phase: PanicPhase) -> Self { Self { phase } } } + #[allow(clippy::manual_async_fn)] impl TargetEngine for PanicEngine { type Observation = (); type Outcome = (); @@ -213,19 +274,37 @@ mod tests { } } + #[allow(clippy::manual_async_fn)] + impl TargetEngine for PendingEngine { + type Observation = (); + type Outcome = (); + type Error = String; + + fn execute_interaction<'a>( + &'a mut self, + _interaction: &'a TestInteraction, + ) -> impl Future> + 'a { + futures_util::future::pending() + } + + fn finish(&mut self) {} + + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a { + async move { Ok(()) } + } + } + struct NoopProperties; - impl StreamingProperties for NoopProperties { - fn observe( - &mut self, - _engine: &PanicEngine, - _interaction: &TestInteraction, - _observation: &(), - ) -> Result<(), String> { + impl StreamingProperties for NoopProperties + where + E: TargetEngine, + { + fn observe(&mut self, _engine: &E, _interaction: &TestInteraction, _observation: &()) -> Result<(), String> { Ok(()) } - fn finish(&mut self, _engine: &PanicEngine, _outcome: &()) -> Result<(), String> { + fn finish(&mut self, _engine: &E, _outcome: &()) -> Result<(), String> { Ok(()) } } @@ -245,6 +324,28 @@ mod tests { assert_not_crash_error(PanicPhase::CollectOutcome, "collect_outcome", "collect panic"); } + #[test] + fn target_timeout_reports_stalled_interaction() { + let mut runtime = crate::sim::Runtime::new(0).expect("runtime"); + let err = runtime + .block_on(run_streaming( + SingleStepSource::new(), + PendingEngine, + NoopProperties, + RunConfig { + max_interactions: Some(1), + max_duration_ms: None, + harness_phase_timeout_ms: Some(1), + commitlog_fault_profile: crate::config::CommitlogFaultProfile::Off, + }, + )) + .unwrap_err() + .to_string(); + + assert!(err.contains("[Timeout]")); + assert!(err.contains("execute_interaction")); + } + fn assert_not_crash_error(phase: PanicPhase, expected_phase: &str, expected_payload: &str) { let mut runtime = crate::sim::Runtime::new(0).expect("runtime"); let err = runtime diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs index b957c4fb0c4..d69a079307a 100644 --- a/crates/dst/src/main.rs +++ b/crates/dst/src/main.rs @@ -2,7 +2,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; use clap::{Args, Parser, Subcommand}; use spacetimedb_dst::{ - config::RunConfig, + config::{CommitlogFaultProfile, RunConfig}, targets::descriptor::{RelationalDbCommitlogDescriptor, TargetDescriptor}, workload::table_ops::TableScenarioId, }; @@ -22,6 +22,8 @@ enum Command { #[derive(Args, Debug)] struct RunArgs { + #[arg(long, default_value = RelationalDbCommitlogDescriptor::NAME, help = "Target to run.")] + target: String, #[arg(long, help = "Seed for generated choices. Defaults to wall-clock time.")] seed: Option, #[arg( @@ -33,6 +35,18 @@ struct RunArgs { max_interactions: Option, #[arg(long, help = "Scenario to run [default: random-crud]")] scenario: Option, + #[arg( + long, + default_value = "default", + help = "Commitlog fault profile: off, light, default, or aggressive." + )] + commitlog_fault_profile: String, + #[arg( + long, + default_value = "30s", + help = "Virtual-time watchdog for one harness phase, such as 500ms, 30s, or off." + )] + harness_phase_timeout: String, } fn main() -> anyhow::Result<()> { @@ -56,20 +70,24 @@ fn init_tracing() { } fn run_command(args: RunArgs) -> anyhow::Result<()> { + resolve_target(&args.target)?; let seed = resolve_seed(args.seed); - let config = build_config(args.duration.as_deref(), args.max_interactions)?; - let scenario = resolve_scenario(args.scenario.as_deref()); + let profile = CommitlogFaultProfile::parse(&args.commitlog_fault_profile)?; + let phase_timeout_ms = parse_optional_duration_spec(&args.harness_phase_timeout)?; + let config = build_config( + args.duration.as_deref(), + args.max_interactions, + profile, + phase_timeout_ms, + )?; + let scenario = resolve_scenario(args.scenario.as_deref())?; run_prepared_target::(seed, scenario, config) } -fn run_prepared_target( - seed: u64, - scenario: D::Scenario, - config: RunConfig, -) -> anyhow::Result<()> +fn run_prepared_target(seed: u64, scenario: D::Scenario, config: RunConfig) -> anyhow::Result<()> where - D: 'static, + D: TargetDescriptor + 'static, D::Scenario: Send + 'static, { D::prepare(seed, &scenario, &config)?; @@ -90,34 +108,57 @@ fn resolve_seed(seed: Option) -> u64 { }) } -fn resolve_scenario(scenario: Option<&str>) -> TableScenarioId { +fn resolve_target(target: &str) -> anyhow::Result<()> { + if target == RelationalDbCommitlogDescriptor::NAME { + Ok(()) + } else { + anyhow::bail!( + "unsupported target: {target}; expected: {}", + RelationalDbCommitlogDescriptor::NAME + ) + } +} + +fn resolve_scenario(scenario: Option<&str>) -> anyhow::Result { match scenario { - Some("random-crud") | None => TableScenarioId::RandomCrud, - Some(other) => { - eprintln!("unknown scenario: {other}, using random-crud"); - TableScenarioId::RandomCrud - } + Some(value) => TableScenarioId::parse(value), + None => Ok(TableScenarioId::default()), } } -fn build_config(duration: Option<&str>, max_interactions: Option) -> anyhow::Result { - Ok(match (duration, max_interactions) { +fn parse_optional_duration_spec(spec: &str) -> anyhow::Result> { + match spec { + "off" | "none" => Ok(None), + _ => Ok(Some( + spacetimedb_dst::config::parse_duration_spec(spec)?.as_millis() as u64 + )), + } +} + +fn build_config( + duration: Option<&str>, + max_interactions: Option, + commitlog_fault_profile: CommitlogFaultProfile, + harness_phase_timeout_ms: Option, +) -> anyhow::Result { + let mut config = match (duration, max_interactions) { (Some(duration), Some(max_interactions)) => RunConfig { max_interactions: Some(max_interactions), max_duration_ms: Some(spacetimedb_dst::config::parse_duration_spec(duration)?.as_millis() as u64), + harness_phase_timeout_ms, + commitlog_fault_profile, }, (Some(duration), None) => RunConfig::with_duration_spec(duration)?, (None, Some(max_interactions)) => RunConfig::with_max_interactions(max_interactions), (None, None) => RunConfig::with_max_interactions(1_000), - }) + }; + config.commitlog_fault_profile = commitlog_fault_profile; + config.harness_phase_timeout_ms = harness_phase_timeout_ms; + Ok(config) } #[allow(clippy::disallowed_macros)] -async fn run_target( - seed: u64, - scenario: D::Scenario, - config: RunConfig, -) -> anyhow::Result<()> { +async fn run_target(seed: u64, scenario: D::Scenario, config: RunConfig) -> anyhow::Result<()> { let line = D::run_streaming(seed, scenario, config).await?; println!("{line}"); Ok(()) diff --git a/crates/dst/src/properties.rs b/crates/dst/src/properties.rs index dbe227c2dd9..1cc36b00c19 100644 --- a/crates/dst/src/properties.rs +++ b/crates/dst/src/properties.rs @@ -31,8 +31,25 @@ pub(crate) use runtime::PropertyRuntime; pub(crate) trait TargetPropertyAccess { fn schema_plan(&self) -> &SchemaPlan; fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String>; - fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String>; - fn collect_rows_for_table(&self, table: usize) -> Result, String>; + fn visit_rows_in_connection( + &self, + conn: SessionId, + table: usize, + visitor: &mut dyn FnMut(SimRow) -> Result<(), String>, + ) -> Result<(), String>; + fn visit_rows_for_table( + &self, + table: usize, + visitor: &mut dyn FnMut(SimRow) -> Result<(), String>, + ) -> Result<(), String>; + fn collect_rows_for_table(&self, table: usize) -> Result, String> { + let mut rows = Vec::new(); + self.visit_rows_for_table(table, &mut |row| { + rows.push(row); + Ok(()) + })?; + Ok(rows) + } fn count_rows(&self, table: usize) -> Result; fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result; fn range_scan( @@ -73,6 +90,8 @@ pub(crate) enum PropertyKind { RangeScanMatchesModel, /// Model/oracle: full scans match the oracle session-visible model. FullScanMatchesModel, + /// Model/oracle: post-reopen table state matches the oracle model. + TablesVerifiedMatchesModel, } #[derive(Clone, Debug)] @@ -121,7 +140,9 @@ pub(crate) enum TableObservation { FullScan { conn: SessionId, table: usize, - actual: Vec, + }, + TablesVerified { + conn: SessionId, }, CommitOrRollback, } @@ -181,7 +202,9 @@ enum PropertyEvent<'a> { FullScan { conn: SessionId, table: usize, - actual: &'a [SimRow], + }, + TablesVerified { + conn: SessionId, }, CommitOrRollback, TableWorkloadFinished(&'a TableWorkloadOutcome), diff --git a/crates/dst/src/properties/rules.rs b/crates/dst/src/properties/rules.rs index 9d2552014c2..2a60b89ab1f 100644 --- a/crates/dst/src/properties/rules.rs +++ b/crates/dst/src/properties/rules.rs @@ -32,6 +32,7 @@ pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { PropertyKind::PredicateCountMatchesModel => Box::::default(), PropertyKind::RangeScanMatchesModel => Box::::default(), PropertyKind::FullScanMatchesModel => Box::::default(), + PropertyKind::TablesVerifiedMatchesModel => Box::::default(), } } @@ -62,12 +63,8 @@ impl PropertyRule for OracleTableStateRule { fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { match event { PropertyEvent::TableWorkloadFinished(outcome) => { - let expected_rows = ctx.models.table().committed_rows(); - if outcome.final_rows != expected_rows { - return Err(format!( - "[OracleTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", - outcome.final_rows - )); + for table in 0..ctx.models.table().table_count() { + assert_committed_rows_match_model(ctx, table, "[OracleTableState]")?; } self.scenario .validate_outcome(&self.schema, outcome) @@ -371,12 +368,51 @@ fn assert_visible_rows_match_model( property: &str, interaction: &crate::workload::table_ops::TableWorkloadInteraction, ) -> Result<(), String> { - let mut actual = ctx.access.collect_rows_in_connection(conn, table)?; - actual.sort_by_key(|row| row.id().unwrap_or_default()); let expected = ctx.models.table().visible_rows(conn, table); - if actual != expected { + assert_rows_match_expected( + &expected, + |visitor| ctx.access.visit_rows_in_connection(conn, table, visitor), + format!( + "{property} visible rows changed unexpectedly on conn={conn}, table={table}; interaction={interaction:?}" + ), + ) +} + +fn assert_committed_rows_match_model(ctx: &PropertyContext<'_>, table: usize, property: &str) -> Result<(), String> { + let expected = ctx.models.table().committed_rows_for_table(table); + assert_rows_match_expected( + &expected, + |visitor| ctx.access.visit_rows_for_table(table, visitor), + format!("{property} committed rows mismatch on table={table}"), + ) +} + +fn assert_rows_match_expected( + expected: &[SimRow], + visit_rows: impl FnOnce(&mut dyn FnMut(SimRow) -> Result<(), String>) -> Result<(), String>, + context: String, +) -> Result<(), String> { + let mut index = 0usize; + visit_rows(&mut |actual| { + let expected_row = expected.get(index).ok_or_else(|| { + format!( + "{context}: unexpected extra row at index {index}: actual={actual:?}, expected_len={}", + expected.len() + ) + })?; + if &actual != expected_row { + return Err(format!( + "{context}: row mismatch at index {index}: expected={expected_row:?}, actual={actual:?}" + )); + } + index += 1; + Ok(()) + })?; + + if let Some(expected_row) = expected.get(index) { return Err(format!( - "{property} visible rows changed unexpectedly on conn={conn}, table={table}: expected={expected:?}, actual={actual:?}; interaction={interaction:?}" + "{context}: missing row at index {index}: expected={expected_row:?}, actual_len={index}, expected_len={}", + expected.len() )); } Ok(()) @@ -462,14 +498,33 @@ struct FullScanMatchesModelRule; impl PropertyRule for FullScanMatchesModelRule { fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { - let PropertyEvent::FullScan { conn, table, actual } = event else { + let PropertyEvent::FullScan { conn, table } = event else { return Ok(()); }; let expected = ctx.models.table().full_scan(conn, table); - if actual != expected.as_slice() { - return Err(format!( - "[Model::FullScan] mismatch conn={conn}, table={table}: expected={expected:?}, actual={actual:?}" - )); + assert_rows_match_expected( + &expected, + |visitor| ctx.access.visit_rows_in_connection(conn, table, visitor), + format!("[Model::FullScan] mismatch conn={conn}, table={table}"), + ) + } +} + +#[derive(Default)] +struct TablesVerifiedMatchesModelRule; + +impl PropertyRule for TablesVerifiedMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::TablesVerified { conn } = event else { + return Ok(()); + }; + for table in 0..ctx.models.table().table_count() { + let expected = ctx.models.table().committed_rows_for_table(table); + assert_rows_match_expected( + &expected, + |visitor| ctx.access.visit_rows_for_table(table, visitor), + format!("[TablesVerifiedMatchesModel] table {table} state mismatch after reopen on conn={conn}"), + )?; } Ok(()) } diff --git a/crates/dst/src/properties/runtime.rs b/crates/dst/src/properties/runtime.rs index 52951b10b17..d4631c15449 100644 --- a/crates/dst/src/properties/runtime.rs +++ b/crates/dst/src/properties/runtime.rs @@ -6,7 +6,9 @@ use crate::{ client::SessionId, core::{StreamingProperties, TargetEngine}, schema::{SchemaPlan, SimRow}, - workload::table_ops::{PredictedOutcome, TableErrorKind, TableOracle, TableWorkloadInteraction, TableWorkloadOutcome}, + workload::table_ops::{ + PredictedOutcome, TableErrorKind, TableOracle, TableWorkloadInteraction, TableWorkloadOutcome, + }, }; use super::{ @@ -47,8 +49,12 @@ impl PropertyModels { } impl TableModel { - pub(super) fn committed_rows(&self) -> Vec> { - self.oracle.clone().committed_rows() + pub(super) fn table_count(&self) -> usize { + self.oracle.table_count() + } + + pub(super) fn committed_rows_for_table(&self, table: usize) -> Vec { + self.oracle.committed_rows_for_table(table) } pub(super) fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { @@ -268,14 +274,12 @@ impl PropertyRuntime { ) } - fn on_full_scan( - &mut self, - access: &dyn TargetPropertyAccess, - conn: SessionId, - table: usize, - actual: &[SimRow], - ) -> Result<(), String> { - self.observe_event(access, PropertyEvent::FullScan { conn, table, actual }) + fn on_full_scan(&mut self, access: &dyn TargetPropertyAccess, conn: SessionId, table: usize) -> Result<(), String> { + self.observe_event(access, PropertyEvent::FullScan { conn, table }) + } + + fn on_tables_verified(&mut self, access: &dyn TargetPropertyAccess, conn: SessionId) -> Result<(), String> { + self.observe_event(access, PropertyEvent::TablesVerified { conn }) } fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { @@ -350,7 +354,8 @@ impl PropertyRuntime { upper, actual, } => self.on_range_scan(access, *conn, *table, cols, lower, upper, actual)?, - TableObservation::FullScan { conn, table, actual } => self.on_full_scan(access, *conn, *table, actual)?, + TableObservation::FullScan { conn, table } => self.on_full_scan(access, *conn, *table)?, + TableObservation::TablesVerified { conn } => self.on_tables_verified(access, *conn)?, TableObservation::CommitOrRollback => {} } @@ -409,6 +414,7 @@ impl Default for PropertyRuntime { PropertyKind::PredicateCountMatchesModel, PropertyKind::RangeScanMatchesModel, PropertyKind::FullScanMatchesModel, + PropertyKind::TablesVerifiedMatchesModel, ]) } } @@ -422,6 +428,7 @@ fn observed_error_kind(observation: &TableObservation) -> Option | TableObservation::PredicateCount { .. } | TableObservation::RangeScan { .. } | TableObservation::FullScan { .. } + | TableObservation::TablesVerified { .. } | TableObservation::CommitOrRollback => None, } } diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs index fdaaa627954..e286add32a2 100644 --- a/crates/dst/src/schema.rs +++ b/crates/dst/src/schema.rs @@ -23,6 +23,9 @@ pub struct TablePlan { /// A value like `[1]` means a single-column secondary index on column 1. /// A value like `[0, 1]` means a composite btree index over columns 0 and 1. pub extra_indexes: Vec>, + /// If true, rows are visible only within the inserting transaction + /// and are not persisted to committed state after commit. + pub is_event: bool, } /// Column definition used by simulators. diff --git a/crates/dst/src/sim/commitlog.rs b/crates/dst/src/sim/commitlog.rs index 7fdd83618fc..73beefe884b 100644 --- a/crates/dst/src/sim/commitlog.rs +++ b/crates/dst/src/sim/commitlog.rs @@ -7,20 +7,25 @@ use std::{ use spacetimedb_commitlog::{ repo::{ - CompressOnce, CompressionStats, Repo, RepoWithoutLockFile, SegmentLen, SegmentReader, TxOffset, TxOffsetIndex, TxOffsetIndexMut, + CompressOnce, CompressionStats, Repo, RepoWithoutLockFile, SegmentLen, SegmentReader, TxOffset, TxOffsetIndex, + TxOffsetIndexMut, }, segment::{FileLike, Header}, }; -use crate::sim::storage_faults::{ - is_injected_fault_text, ShortIoKind, StorageFaultConfig, StorageFaultController, StorageFaultDomain, - StorageFaultKind, StorageFaultSummary, +use crate::{ + config::StorageFaultSummary, + sim::storage_faults::{ + is_injected_fault_text, ShortIoKind, StorageFaultConfig, StorageFaultController, StorageFaultDomain, + StorageFaultKind, + }, }; pub(crate) type CommitlogFaultConfig = StorageFaultConfig; pub(crate) type CommitlogFaultSummary = StorageFaultSummary; /// Returns true if `text` contains an error created by this fault layer. +#[allow(dead_code)] pub(crate) fn is_injected_disk_error_text(text: &str) -> bool { is_injected_fault_text(StorageFaultDomain::Disk, text) } @@ -45,6 +50,7 @@ impl FaultableRepo { } } + #[allow(dead_code)] pub(crate) fn enable_faults(&self) { self.faults.enable(); } @@ -53,6 +59,7 @@ impl FaultableRepo { self.faults.summary() } + #[allow(dead_code)] pub(crate) fn with_faults_suspended(&self, f: impl FnOnce() -> T) -> T { self.faults.with_suspended(f) } diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs index 51cea430fc6..14c1227f1c4 100644 --- a/crates/dst/src/sim/mod.rs +++ b/crates/dst/src/sim/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod commitlog; pub(crate) mod snapshot; pub(crate) mod storage_faults; +pub mod time; use std::{cell::RefCell, future::Future, time::Duration}; diff --git a/crates/dst/src/sim/snapshot.rs b/crates/dst/src/sim/snapshot.rs index 13c0e3a43c3..698d2d22f38 100644 --- a/crates/dst/src/sim/snapshot.rs +++ b/crates/dst/src/sim/snapshot.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + //! In-memory snapshot storage with deterministic fault injection. //! //! This is intentionally a semantic snapshot seam, not a filesystem facade. It @@ -14,9 +16,11 @@ use spacetimedb_snapshot::{ }; use spacetimedb_table::{blob_store::BlobStore, page_pool::PagePool, table::Table}; -use crate::sim::storage_faults::{ - is_injected_fault_text, StorageFaultConfig, StorageFaultController, StorageFaultDomain, StorageFaultKind, - StorageFaultSummary, +use crate::{ + config::StorageFaultSummary, + sim::storage_faults::{ + is_injected_fault_text, StorageFaultConfig, StorageFaultController, StorageFaultDomain, StorageFaultKind, + }, }; pub(crate) type SnapshotFaultConfig = StorageFaultConfig; diff --git a/crates/dst/src/sim/storage_faults.rs b/crates/dst/src/sim/storage_faults.rs index a1c59e5ca71..a3627cbe9b5 100644 --- a/crates/dst/src/sim/storage_faults.rs +++ b/crates/dst/src/sim/storage_faults.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + //! Shared storage fault-injection primitives for DST simulation helpers. //! //! Fault decisions use [`spacetimedb_runtime::sim::Handle::buggify_with_prob`] @@ -12,7 +14,7 @@ use std::{ time::Duration, }; -use crate::config::CommitlogFaultProfile; +use crate::config::{CommitlogFaultProfile, StorageFaultSummary}; const INJECTED_ERROR_PREFIX: &str = "dst injected "; @@ -108,22 +110,6 @@ impl StorageFaultConfig { } } -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub(crate) struct StorageFaultSummary { - pub(crate) profile: CommitlogFaultProfile, - pub(crate) latency: usize, - pub(crate) short_read: usize, - pub(crate) short_write: usize, - pub(crate) read_error: usize, - pub(crate) write_error: usize, - pub(crate) flush_error: usize, - pub(crate) fsync_error: usize, - pub(crate) open_error: usize, - pub(crate) metadata_error: usize, - pub(crate) no_space: usize, - pub(crate) partial_failure: usize, -} - #[derive(Clone, Copy, Debug)] pub(crate) enum StorageFaultDomain { Disk, @@ -263,7 +249,7 @@ impl StorageFaultController { } fn sample_latency(&self, probability: f64) -> bool { - if probability <= 0.0 { + if probability <= 0.0 || !self.active() { return false; } match &self.handle { diff --git a/crates/dst/src/sim/time.rs b/crates/dst/src/sim/time.rs index bdeae0fbb58..1db4470c9ba 100644 --- a/crates/dst/src/sim/time.rs +++ b/crates/dst/src/sim/time.rs @@ -21,7 +21,10 @@ pub async fn sleep(duration: Duration) { current_handle().sleep(duration).await } -pub async fn timeout(duration: Duration, future: impl core::future::Future) -> Result { +pub async fn timeout( + duration: Duration, + future: impl core::future::Future, +) -> Result { current_handle().timeout(duration, future).await } @@ -76,7 +79,7 @@ mod tests { }); assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); - assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + assert_elapsed_near(runtime.elapsed(), Duration::from_millis(10)); } #[test] @@ -102,7 +105,7 @@ mod tests { }); assert_eq!(output, Ok(9)); - assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + assert_elapsed_near(runtime.elapsed(), Duration::from_millis(3)); } #[test] @@ -118,6 +121,14 @@ mod tests { }); assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); - assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + assert_elapsed_near(runtime.elapsed(), Duration::from_millis(4)); + } + + fn assert_elapsed_near(actual: Duration, expected: Duration) { + assert!(actual >= expected, "actual={actual:?} expected={expected:?}"); + assert!( + actual < expected + Duration::from_millis(1), + "actual={actual:?} expected={expected:?}" + ); } } diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs index 1a00c77a937..3212be338a0 100644 --- a/crates/dst/src/targets/descriptor.rs +++ b/crates/dst/src/targets/descriptor.rs @@ -26,14 +26,32 @@ impl TargetDescriptor for RelationalDbCommitlogDescriptor { fn run_streaming(seed: u64, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { Box::pin(async move { + let scenario_name = scenario.as_str(); + let max_interactions = config.max_interactions; + let duration_ms = config.max_duration_ms; + let profile = config.commitlog_fault_profile; + let harness_phase_timeout_ms = config.harness_phase_timeout_ms; let outcome = crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config) .await?; Ok(format!( - "ok target={} seed={} steps={}", + "ok target={} scenario={} seed={} max_interactions={} duration_ms={} harness_phase_timeout_ms={} commitlog_fault_profile={} interactions={} final_row_count={} commitlog_faults={:?}", Self::NAME, + scenario_name, seed, + max_interactions + .map(|value| value.to_string()) + .unwrap_or_else(|| "none".to_string()), + duration_ms + .map(|value| value.to_string()) + .unwrap_or_else(|| "none".to_string()), + harness_phase_timeout_ms + .map(|value| value.to_string()) + .unwrap_or_else(|| "off".to_string()), + profile, + outcome.interactions_executed, outcome.final_row_counts.iter().sum::(), + outcome.commitlog_fault_summary, )) }) } diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs index 5a116a6e3aa..973248408e9 100644 --- a/crates/dst/src/targets/relational_db_commitlog.rs +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -17,7 +17,8 @@ use spacetimedb_lib::{ db::auth::{StAccess, StTableType}, Identity, }; -use spacetimedb_primitives::TableId; + +use spacetimedb_primitives::{ColList, TableId}; use spacetimedb_runtime::Handle as RuntimeHandle; use spacetimedb_sats::AlgebraicValue; use spacetimedb_schema::{ @@ -25,28 +26,25 @@ use spacetimedb_schema::{ schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, table_name::TableName, }; -use spacetimedb_snapshot::SnapshotStore; use spacetimedb_table::page_pool::PagePool; use tracing::{info, trace}; use crate::{ client::SessionId, - config::{CommitlogFaultProfile, RunConfig}, + config::RunConfig, core::{self, TargetEngine}, - properties::{ - PropertyRuntime, TableMutation, TableObservation, TargetPropertyAccess, - }, - schema::{SchemaPlan, SimRow}, + properties::{PropertyRuntime, TableMutation, TableObservation, TargetPropertyAccess}, + schema::{SchemaPlan, SimRow, TablePlan}, sim::{ commitlog::{CommitlogFaultConfig, FaultableRepo}, - fork_seed, - snapshot::BuggifiedSnapshotRepo, - storage_faults::StorageFaultConfig, - Rng, + fork_seed, Rng, }, - workload::table_ops::{ - ConnectionWriteState, TableErrorKind, TableOperation, TableScenario, TableScenarioId, TableWorkloadInteraction, - TableWorkloadOutcome, TableWorkloadSource, + workload::{ + commitlog_ops::CommitlogWorkloadSource, + table_ops::{ + ConnectionWriteState, TableErrorKind, TableOperation, TableScenario, TableScenarioId, + TableWorkloadInteraction, TableWorkloadOutcome, + }, }, }; @@ -63,51 +61,62 @@ pub async fn run_generated_with_config_and_scenario( }; let schema_rng = Rng::new(fork_seed(seed, 122)); let schema = scenario.generate_schema(&schema_rng); - let source = TableWorkloadSource::new( - seed, - scenario, - schema.clone(), - num_connections, - config.max_interactions_or_default(usize::MAX), - ); + // Use the lifecycle-wrapped source so every run exercises commitlog + // close/reopen/replay. The lifecycle layer periodically injects + // Reopen + VerifyTables operations into the interaction stream. + let source = CommitlogWorkloadSource::new(seed, scenario, schema.clone(), num_connections, usize::MAX); let sim_handle = crate::sim::current_handle().expect("must run inside sim Runtime::block_on"); let rt_handle = RuntimeHandle::simulation(sim_handle.clone()); // Build faulty commitlog + persistence + let page_pool = PagePool::new_for_test(); + let commitlog_fault_profile = config.commitlog_fault_profile; let clog_repo = FaultableRepo::new( Memory::unlimited(), - CommitlogFaultConfig::for_profile(CommitlogFaultProfile::Default), + CommitlogFaultConfig::for_profile(commitlog_fault_profile), ); + // Clone the repo so we can reopen from it later. + let clog_repo_for_reopen = clog_repo.clone(); let local = DurabilityLocal::open_with_repo(clog_repo, rt_handle.clone(), DurabilityOpts::default())?; let history = local.as_history(); let durability = Arc::new(local); - // Build faulty snapshot store - let snap_repo = Arc::new(BuggifiedSnapshotRepo::new( - StorageFaultConfig::for_profile(CommitlogFaultProfile::Default), - )?) as Arc; - // Enable buggify after setup so initial replay is fault-free sim_handle.enable_buggify(); let persistence = Persistence { durability, disk_size: { - use std::io; use spacetimedb_commitlog::repo::SizeOnDisk; - Arc::new(|| io::Result::Ok(SizeOnDisk { total_bytes: 0, total_blocks: 0 })) as DiskSizeFn + use std::io; + Arc::new(|| { + io::Result::Ok(SizeOnDisk { + total_bytes: 0, + total_blocks: 0, + }) + }) as DiskSizeFn }, - snapshot_store: Some(snap_repo), snapshots: None, - runtime: rt_handle, + runtime: rt_handle.clone(), }; - let engine = RelationalDbEngine::new(seed, &schema, num_connections, history, Some(persistence))?; + let engine = RelationalDbEngine::new( + seed, + &schema, + num_connections, + history, + Some(persistence), + clog_repo_for_reopen, + rt_handle, + page_pool, + commitlog_fault_profile, + )?; let properties = PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections); let outcome = core::run_streaming(source, engine, properties, config).await?; info!( - applied_steps = outcome.final_row_counts.iter().sum::(), + interactions_executed = outcome.interactions_executed, + final_row_count = outcome.final_row_counts.iter().sum::(), "relational_db_table complete" ); Ok(outcome) @@ -120,25 +129,50 @@ struct RelationalDbEngine { base_schema: SchemaPlan, base_table_ids: Vec, step: usize, + /// Clone of the commitlog repo, kept for close/reopen cycles. + clog_repo: FaultableRepo, + /// Runtime handle for creating new durability instances. + rt_handle: RuntimeHandle, + /// Page pool for creating new RelationalDB instances. + page_pool: PagePool, + /// Database identity, preserved across reopen. + db_identity: Identity, + /// Owner identity, preserved across reopen. + owner_identity: Identity, + commitlog_fault_profile: crate::config::CommitlogFaultProfile, } impl RelationalDbEngine { + #[allow(clippy::too_many_arguments)] fn new>( - _seed: u64, schema: &SchemaPlan, num_connections: usize, - history: H, persistence: Option, + _seed: u64, + schema: &SchemaPlan, + num_connections: usize, + history: H, + persistence: Option, + clog_repo: FaultableRepo, + rt_handle: RuntimeHandle, + page_pool: PagePool, + commitlog_fault_profile: crate::config::CommitlogFaultProfile, ) -> anyhow::Result { + info!("DST: before RelationalDB::open"); + let db_identity = Identity::ZERO; + let owner_identity = Identity::ZERO; let (db, connected_clients) = RelationalDB::open( - Identity::ZERO, - Identity::ZERO, + db_identity, + owner_identity, history, persistence, None, - PagePool::new_for_test(), + page_pool.clone(), )?; + info!("DST: after RelationalDB::open"); assert_eq!(connected_clients.len(), 0); + info!("DST: before set_initialized"); db.with_auto_commit(Workload::Internal, |tx| { db.set_initialized(tx, spacetimedb_datastore::traits::Program::empty(HostType::Wasm.into())) })?; + info!("DST: after set_initialized"); let mut engine = Self { db: Some(db), @@ -147,13 +181,23 @@ impl RelationalDbEngine { base_schema: schema.clone(), base_table_ids: Vec::with_capacity(schema.tables.len()), step: 0, + clog_repo, + rt_handle, + page_pool, + db_identity, + owner_identity, + commitlog_fault_profile, }; + info!("DST: before install_base_schema"); engine.install_base_schema().map_err(anyhow::Error::msg)?; + info!("DST: after install_base_schema"); Ok(engine) } fn db(&self) -> Result<&RelationalDB, String> { - self.db.as_ref().ok_or_else(|| "relational db not initialized".to_string()) + self.db + .as_ref() + .ok_or_else(|| "relational db not initialized".to_string()) } fn install_base_schema(&mut self) -> Result<(), String> { @@ -284,13 +328,14 @@ impl RelationalDbEngine { .map(|(idx, existing)| ColumnSchema::for_test(idx as u16, &existing.name, existing.ty.clone())) .collect::>(); columns.push(ColumnSchema::for_test(column_idx, &column.name, column.ty.clone())); - self.with_mut_tx(*conn, |engine, tx| { - let new_table_id = engine + let new_table_id = self.with_mut_tx(*conn, |engine, tx| { + engine .db()? .add_columns_to_table(tx, table_id, columns.clone(), vec![default.clone()]) - .map_err(|err| format!("add column failed: {err}"))?; - Ok(new_table_id) + .map_err(|err| format!("add column failed: {err}")) })?; + self.base_schema.tables[*table].columns.push(column.clone()); + self.base_table_ids[*table] = new_table_id; Ok(TableObservation::Applied) } TableOperation::AddIndex { conn, table, cols } => { @@ -357,14 +402,17 @@ impl RelationalDbEngine { actual, }) } - TableOperation::FullScan { conn, table } => { - let actual = self.collect_rows_in_connection(*conn, *table)?; - Ok(TableObservation::FullScan { - conn: *conn, - table: *table, - actual, - }) + TableOperation::FullScan { conn, table } => Ok(TableObservation::FullScan { + conn: *conn, + table: *table, + }), + TableOperation::AddTable { conn, schema } => self.execute_add_table(*conn, schema), + TableOperation::DropTable { conn, table } => self.execute_drop_table(*conn, *table), + TableOperation::TruncateTable { conn, table } => self.execute_truncate_table(*conn, *table), + TableOperation::Reopen { .. } => { + Err("Reopen must be handled via execute_interaction async path".to_string()) } + TableOperation::VerifyTables { conn } => self.execute_verify_tables(*conn), } } @@ -393,9 +441,7 @@ impl RelationalDbEngine { } None => { if self.execution.active_writer.is_some() || self.any_open_read_tx() { - Ok(TableObservation::ObservedError( - TableErrorKind::WriteConflict, - )) + Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)) } else { Err(format!( "connection {conn} failed to begin write transaction without an open conflicting lock" @@ -612,15 +658,21 @@ impl RelationalDbEngine { result } - fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { + fn visit_rows_by_id( + &self, + table_id: TableId, + visitor: &mut dyn FnMut(SimRow) -> Result<(), String>, + ) -> Result<(), String> { self.with_fresh_read_tx(|db, tx| { - let mut rows = db - .iter(tx, table_id) + let cols = [0u16].into_iter().collect::(); + let bounds = (Bound::::Unbounded, Bound::::Unbounded); + for row_ref in db + .iter_by_col_range(tx, table_id, cols, bounds) .map_err(|err| format!("scan failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - Ok(rows) + { + visitor(SimRow::from_product_value(row_ref.to_product_value()))?; + } + Ok(()) }) } @@ -651,28 +703,35 @@ impl RelationalDbEngine { } } - fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { + fn visit_rows_in_connection( + &self, + conn: SessionId, + table: usize, + visitor: &mut dyn FnMut(SimRow) -> Result<(), String>, + ) -> Result<(), String> { let table_id = self.table_id_for_index(table)?; + let cols = [0u16].into_iter().collect::(); + let bounds = (Bound::::Unbounded, Bound::::Unbounded); if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { - let mut rows = self + for row_ref in self .db()? - .iter_mut(tx, table_id) + .iter_by_col_range_mut(tx, table_id, cols, bounds) .map_err(|err| format!("in-tx scan failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - Ok(rows) + { + visitor(SimRow::from_product_value(row_ref.to_product_value()))?; + } + Ok(()) } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { - let mut rows = self + for row_ref in self .db()? - .iter(tx, table_id) + .iter_by_col_range(tx, table_id, cols, bounds) .map_err(|err| format!("read-tx scan failed: {err}"))? - .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) - .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); - Ok(rows) + { + visitor(SimRow::from_product_value(row_ref.to_product_value()))?; + } + Ok(()) } else { - self.collect_rows_by_id(table_id) + self.visit_rows_by_id(table_id, visitor) } } @@ -717,37 +776,186 @@ impl RelationalDbEngine { let table_id = self.table_id_for_index(table)?; let cols_list = cols.iter().copied().collect::(); if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { - let mut rows = self + let rows = self .db()? .iter_by_col_range_mut(tx, table_id, cols_list, (lower, upper)) .map_err(|err| format!("in-tx range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { - let mut rows = self + let rows = self .db()? .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) .map_err(|err| format!("read-tx range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) } else { self.with_fresh_read_tx(|db, tx| { - let mut rows = db + let rows = db .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) .map_err(|err| format!("range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) }) } } + + fn execute_add_table(&mut self, conn: SessionId, schema: &TablePlan) -> Result { + let columns = schema + .columns + .iter() + .enumerate() + .map(|(idx, col)| ColumnSchema::for_test(idx as u16, &col.name, col.ty.clone())) + .collect::>(); + let mut indexes = vec![IndexSchema::for_test( + format!("{}_id_idx", schema.name), + BTreeAlgorithm::from(0), + )]; + for cols in &schema.extra_indexes { + let cols_name = cols.iter().map(|c| format!("c{c}")).collect::>().join("_"); + indexes.push(IndexSchema::for_test( + format!("{}_{}_idx", schema.name, cols_name), + BTreeAlgorithm::from(cols.iter().copied().collect::()), + )); + } + let constraints = vec![ConstraintSchema::unique_for_test( + format!("{}_id_unique", schema.name), + 0, + )]; + let table_schema = TableSchema::new( + TableId::SENTINEL, + TableName::for_test(&schema.name), + None, + columns, + indexes, + constraints, + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + schema.is_event, + None, + ); + self.with_mut_tx(conn, |engine, tx| { + let table_id = engine + .db()? + .create_table(tx, table_schema.clone()) + .map_err(|err| format!("add table failed: {err}"))?; + engine.base_table_ids.push(table_id); + engine.base_schema.tables.push(schema.clone()); + Ok(()) + })?; + Ok(TableObservation::Applied) + } + + fn execute_truncate_table(&mut self, conn: SessionId, table: usize) -> Result { + let table_id = self.table_id_for_index(table)?; + self.with_mut_tx(conn, |engine, tx| { + engine + .db()? + .clear_table(tx, table_id) + .map_err(|err| format!("truncate table failed: {err}"))?; + Ok(()) + })?; + Ok(TableObservation::Applied) + } + + fn execute_drop_table(&mut self, conn: SessionId, table: usize) -> Result { + // Clear all rows from the table, like truncate, but exercise the DDL path. + // Future work: actually drop the table schema from the catalog. + let table_id = self.table_id_for_index(table)?; + self.with_mut_tx(conn, |engine, tx| { + engine + .db()? + .clear_table(tx, table_id) + .map_err(|err| format!("drop table (clear) failed: {err}"))?; + Ok(()) + })?; + Ok(TableObservation::Applied) + } + + fn execute_verify_tables(&mut self, conn: SessionId) -> Result { + Ok(TableObservation::TablesVerified { conn }) + } + + /// Close the current RelationalDB and reopen from the commitlog. + /// + /// This exercises the full replay path: durability close, commitlog reopen, + /// `apply_history`, and `ReplayCommittedState`. After reopening, the DB + /// should have exactly the same committed state as before close. + async fn execute_reopen(&mut self) -> Result { + // 1. Take ownership of the current DB and shut down its durability. + let db = self + .db + .take() + .ok_or_else(|| "db not initialized for reopen".to_string())?; + let _ = db.shutdown().await; + drop(db); + + // 2. Create a new Local durability from the stored repo clone. + let new_local = DurabilityLocal::open_with_repo( + self.clog_repo.clone(), + self.rt_handle.clone(), + DurabilityOpts::default(), + ) + .map_err(|e| format!("reopen: durability open failed: {e}"))?; + let new_history = new_local.as_history(); + let new_durability: Arc>> = + Arc::new(new_local); + + // 3. Build persistence for the new DB. + let persistence = Persistence { + durability: new_durability.clone(), + disk_size: { + use spacetimedb_commitlog::repo::SizeOnDisk; + use std::io; + Arc::new(|| { + io::Result::Ok(SizeOnDisk { + total_bytes: 0, + total_blocks: 0, + }) + }) as DiskSizeFn + }, + snapshots: None, + runtime: self.rt_handle.clone(), + }; + + // 4. Open a new RelationalDB from the same commitlog history. + let (new_db, connected_clients) = RelationalDB::open( + self.db_identity, + self.owner_identity, + new_history, + Some(persistence), + None, + self.page_pool.clone(), + ) + .map_err(|e| format!("reopen: RelationalDB::open failed: {e}"))?; + if !connected_clients.is_empty() { + return Err(format!( + "reopen: got {} connected clients, expected 0", + connected_clients.len() + )); + } + + self.db = Some(new_db); + // Reset execution state: no open transactions after reopen. + self.execution.active_writer = None; + for slot in &mut self.execution.tx_by_connection { + *slot = None; + } + for slot in &mut self.read_tx_by_connection { + *slot = None; + } + + Ok(TableObservation::Applied) + } } +#[allow(clippy::manual_async_fn)] impl TargetEngine for RelationalDbEngine { type Observation = TableObservation; type Outcome = TableWorkloadOutcome; @@ -757,24 +965,49 @@ impl TargetEngine for RelationalDbEngine { &'a mut self, interaction: &'a TableWorkloadInteraction, ) -> impl std::future::Future> + 'a { - async move { self.execute(interaction) } + async move { + if matches!(interaction.op, TableOperation::Reopen { .. }) { + return self.execute_reopen().await; + } + self.execute(interaction) + } } - fn finish(&mut self) {} + fn finish(&mut self) { + let Some(db) = self.db.as_ref() else { + return; + }; + for tx in &mut self.execution.tx_by_connection { + if let Some(tx) = tx.take() { + let _ = db.rollback_mut_tx(tx); + } + } + self.execution.active_writer = None; + for tx in &mut self.read_tx_by_connection { + if let Some(tx) = tx.take() { + let _ = db.release_tx(tx); + } + } + } fn collect_outcome<'a>(&'a mut self) -> impl std::future::Future> + 'a { async move { - let mut final_rows = Vec::with_capacity(self.base_schema.tables.len()); let mut final_row_counts = Vec::with_capacity(self.base_schema.tables.len()); for table in 0..self.base_schema.tables.len() { let table_id = self.table_id_for_index(table).map_err(anyhow::Error::msg)?; - let rows = self.collect_rows_by_id(table_id).map_err(anyhow::Error::msg)?; - final_row_counts.push(rows.len() as u64); - final_rows.push(rows); + let mut row_count = 0u64; + self.visit_rows_by_id(table_id, &mut |_| { + row_count += 1; + Ok(()) + }) + .map_err(anyhow::Error::msg)?; + final_row_counts.push(row_count); } Ok(TableWorkloadOutcome { + interactions_executed: 0, + commitlog_fault_profile: self.commitlog_fault_profile, + commitlog_fault_summary: self.clog_repo.fault_summary(), final_row_counts, - final_rows, }) } } @@ -789,13 +1022,22 @@ impl TargetPropertyAccess for RelationalDbEngine { self.lookup_base_row(conn, table, id) } - fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { - self.collect_rows_in_connection(conn, table) + fn visit_rows_in_connection( + &self, + conn: SessionId, + table: usize, + visitor: &mut dyn FnMut(SimRow) -> Result<(), String>, + ) -> Result<(), String> { + RelationalDbEngine::visit_rows_in_connection(self, conn, table, visitor) } - fn collect_rows_for_table(&self, table: usize) -> Result, String> { + fn visit_rows_for_table( + &self, + table: usize, + visitor: &mut dyn FnMut(SimRow) -> Result<(), String>, + ) -> Result<(), String> { let table_id = self.table_id_for_index(table)?; - self.collect_rows_by_id(table_id) + self.visit_rows_by_id(table_id, visitor) } fn count_rows(&self, table: usize) -> Result { @@ -828,12 +1070,11 @@ impl TargetPropertyAccess for RelationalDbEngine { let table_id = self.table_id_for_index(table)?; let cols_list = cols.iter().copied().collect::(); self.with_fresh_read_tx(|db, tx| { - let mut rows = db + let rows = db .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) .map_err(|err| format!("range scan failed: {err}"))? .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) .collect::>(); - rows.sort_by_key(|row| row.id().unwrap_or_default()); Ok(rows) }) } diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs index faf3c04b5f2..b36de904413 100644 --- a/crates/dst/src/workload/mod.rs +++ b/crates/dst/src/workload/mod.rs @@ -1,4 +1,5 @@ //! Shared workload generators reused by multiple DST targets. -pub mod table_ops; +pub(crate) mod commitlog_ops; pub(crate) mod strategy; +pub mod table_ops; diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs index b6050fd8e18..c20948e9c1a 100644 --- a/crates/dst/src/workload/table_ops/generation.rs +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -165,6 +165,19 @@ impl<'a> ScenarioPlanner<'a> { self.model.add_index(table, cols); } + pub fn add_table(&mut self, schema: &TablePlan, is_event: bool) -> usize { + self.model.add_table(schema, is_event) + } + + pub fn truncate(&mut self, conn: SessionId, table: usize) { + self.model.truncate(conn, table); + } + + #[allow(dead_code)] + pub fn drop_table(&mut self, conn: SessionId, table: usize) { + self.model.drop_table(conn, table); + } + pub fn absent_row(&mut self, conn: SessionId, table: usize) -> crate::schema::SimRow { self.model.absent_row(self.rng, conn, table) } @@ -183,13 +196,7 @@ impl<'a> ScenarioPlanner<'a> { } impl TableWorkloadSource { - pub fn new( - seed: u64, - scenario: S, - schema: SchemaPlan, - num_connections: usize, - target_interactions: usize, - ) -> Self { + pub fn new(seed: u64, scenario: S, schema: SchemaPlan, num_connections: usize, target_interactions: usize) -> Self { Self { rng: Rng::new(fork_seed(seed, 17)), scenario, diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs index f56b1db5a25..28503cc5338 100644 --- a/crates/dst/src/workload/table_ops/model.rs +++ b/crates/dst/src/workload/table_ops/model.rs @@ -4,7 +4,7 @@ use spacetimedb_sats::AlgebraicValue; use crate::{ client::SessionId, - schema::{distinct_value_for_type, generate_value_for_type, ColumnPlan, SchemaPlan, SimRow}, + schema::{distinct_value_for_type, generate_value_for_type, ColumnPlan, SchemaPlan, SimRow, TablePlan}, sim::{fork_seed, Rng}, }; @@ -22,6 +22,8 @@ pub(crate) struct GenerationModel { committed: Vec>, next_ids: Vec, active_writer: Option, + is_event: Vec, + table_counter: usize, } #[derive(Clone, Debug, Default)] @@ -42,6 +44,8 @@ impl GenerationModel { .map(|idx| fork_seed(seed, idx as u64 + 100)) .collect(), active_writer: None, + is_event: schema.tables.iter().map(|t| t.is_event).collect(), + table_counter: schema.tables.len(), } } @@ -184,7 +188,9 @@ impl GenerationModel { self.committed[*table].retain(|candidate| candidate != row); } for (table, row) in &inserts { - self.committed[*table].push(row.clone()); + if !self.is_event[*table] { + self.committed[*table].push(row.clone()); + } } } @@ -225,6 +231,36 @@ impl GenerationModel { indexes.push(cols); } } + + pub(crate) fn add_table(&mut self, schema: &TablePlan, is_event: bool) -> usize { + let table_idx = self.table_counter; + self.table_counter += 1; + self.schema.tables.push(schema.clone()); + self.committed.push(Vec::new()); + self.next_ids.push(1); + self.is_event.push(is_event); + for conn in &mut self.connections { + if let Some(snapshot) = &mut conn.read_snapshot { + snapshot.push(Vec::new()); + } + } + table_idx + } + + pub(crate) fn truncate(&mut self, conn: SessionId, table: usize) { + self.committed[table].clear(); + let pending = &mut self.connections[conn.as_index()]; + pending.staged_inserts.retain(|(t, _)| *t != table); + pending.staged_deletes.retain(|(t, _)| *t != table); + if let Some(snapshot) = &mut pending.read_snapshot { + snapshot[table].clear(); + } + } + + #[allow(dead_code)] + pub(crate) fn drop_table(&mut self, conn: SessionId, table: usize) { + self.truncate(conn, table); + } } /// Replay model used as the oracle for table workload properties. @@ -237,6 +273,7 @@ pub struct TableOracle { committed: Vec>, connections: Vec, active_writer: Option, + is_event: Vec, } #[derive(Clone, Debug, Eq, PartialEq)] @@ -265,6 +302,7 @@ impl TableOracle { committed: vec![Vec::new(); table_count], connections: vec![ExpectedConnection::default(); connection_count], active_writer: None, + is_event: vec![false; table_count], } } @@ -316,6 +354,19 @@ impl TableOracle { TableOperation::InsertRows { conn, table, rows } => self.predict_insert_rows(*conn, *table, rows), TableOperation::DeleteRows { conn, table, rows } => self.predict_delete_rows(*conn, *table, rows), TableOperation::AddColumn { .. } | TableOperation::AddIndex { .. } => Ok(PredictedOutcome::Applied), + TableOperation::AddTable { .. } => Ok(PredictedOutcome::Applied), + TableOperation::DropTable { conn, table } => { + self.ensure_connection(*conn)?; + self.ensure_table(*table)?; + Ok(PredictedOutcome::Applied) + } + TableOperation::TruncateTable { conn, table } => { + self.ensure_connection(*conn)?; + self.ensure_table(*table)?; + Ok(PredictedOutcome::Applied) + } + TableOperation::Reopen { .. } => Ok(PredictedOutcome::Applied), + TableOperation::VerifyTables { .. } => Ok(PredictedOutcome::Applied), TableOperation::PointLookup { .. } | TableOperation::PredicateCount { .. } | TableOperation::RangeScan { .. } @@ -352,7 +403,9 @@ impl TableOracle { self.committed[table].retain(|candidate| *candidate != row); } for (table, row) in state.staged_inserts.drain(..) { - self.committed[table].push(row); + if !self.is_event[table] { + self.committed[table].push(row); + } } state.in_tx = false; self.active_writer = None; @@ -376,6 +429,17 @@ impl TableOracle { self.add_column(*table, default.clone()); } TableOperation::AddIndex { .. } => {} + TableOperation::AddTable { schema, .. } => { + self.add_table(schema.is_event); + } + TableOperation::DropTable { conn, table } => { + self.drop_table(*conn, *table); + } + TableOperation::TruncateTable { conn, table } => { + self.truncate(*conn, *table); + } + TableOperation::Reopen { .. } => {} + TableOperation::VerifyTables { .. } => {} TableOperation::PointLookup { .. } | TableOperation::PredicateCount { .. } | TableOperation::RangeScan { .. } @@ -526,11 +590,14 @@ impl TableOracle { rows } - pub fn committed_rows(mut self) -> Vec> { - for table_rows in &mut self.committed { - table_rows.sort_by_key(|row| row.id().unwrap_or_default()); - } - self.committed + pub fn table_count(&self) -> usize { + self.committed.len() + } + + pub fn committed_rows_for_table(&self, table: usize) -> Vec { + let mut rows = self.committed[table].clone(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + rows } fn insert(&mut self, conn: SessionId, table: usize, row: SimRow) { @@ -594,6 +661,30 @@ impl TableOracle { } } } + + pub fn add_table(&mut self, is_event: bool) { + self.committed.push(Vec::new()); + self.is_event.push(is_event); + for conn in &mut self.connections { + if let Some(snapshot) = &mut conn.read_snapshot { + snapshot.push(Vec::new()); + } + } + } + + fn truncate(&mut self, conn: SessionId, table: usize) { + self.committed[table].clear(); + let state = &mut self.connections[conn.as_index()]; + state.staged_inserts.retain(|(t, _)| *t != table); + state.staged_deletes.retain(|(t, _)| *t != table); + if let Some(snapshot) = &mut state.read_snapshot { + snapshot[table].clear(); + } + } + + fn drop_table(&mut self, conn: SessionId, table: usize) { + self.truncate(conn, table); + } } fn bound_contains_lower(bound: &Bound, key: &AlgebraicValue) -> bool { diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs index 4619473dc36..ef33a353310 100644 --- a/crates/dst/src/workload/table_ops/scenarios/mod.rs +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -13,6 +13,27 @@ pub enum TableScenarioId { RandomCrud, } +impl TableScenarioId { + pub fn parse(value: &str) -> anyhow::Result { + match value { + "random-crud" => Ok(Self::RandomCrud), + _ => anyhow::bail!("unsupported scenario: {value}; expected: random-crud"), + } + } + + pub const fn as_str(self) -> &'static str { + match self { + Self::RandomCrud => "random-crud", + } + } +} + +impl std::fmt::Display for TableScenarioId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + impl TableScenario for RandomCrudScenario { fn generate_schema(&self, rng: &Rng) -> SchemaPlan { random_crud::generate_schema(rng) diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs index 5864592e0e6..2ccf9a4d498 100644 --- a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -34,6 +34,9 @@ struct TableWorkloadProfile { unique_key_conflict_insert_pct: usize, add_column_pct: usize, add_index_pct: usize, + add_table_pct: usize, + add_event_table_pct: usize, + truncate_table_pct: usize, } const RANDOM_CRUD_PROFILE: TableWorkloadProfile = TableWorkloadProfile { @@ -58,6 +61,9 @@ const RANDOM_CRUD_PROFILE: TableWorkloadProfile = TableWorkloadProfile { unique_key_conflict_insert_pct: 4, add_column_pct: 1, add_index_pct: 2, + add_table_pct: 2, + add_event_table_pct: 1, + truncate_table_pct: 3, }; pub fn generate_schema(rng: &Rng) -> SchemaPlan { @@ -116,6 +122,7 @@ fn generate_schema_with_profile(rng: &Rng, profile: TableWorkloadProfile) -> Sch name: format!("dst_table_{table_idx}_{}", rng.next_u64() % 10_000), columns, extra_indexes, + is_event: false, }); } @@ -197,6 +204,22 @@ fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: SessionId, { return; } + if planner.active_writer().is_none() + && !planner.any_read_tx() + && planner.roll_percent(profile.add_table_pct + profile.add_event_table_pct) + && emit_add_table(planner, conn, profile) + { + return; + } + if !visible_rows.is_empty() + && planner.active_writer().is_none() + && !planner.any_read_tx() + && planner.roll_percent(profile.truncate_table_pct) + { + planner.truncate(conn, table); + planner.push_interaction(TableWorkloadInteraction::truncate_table(conn, table)); + return; + } if emit_query(planner, conn, table, &visible_rows) { return; } @@ -304,6 +327,38 @@ fn emit_add_index(planner: &mut ScenarioPlanner<'_>, conn: SessionId, table: usi true } +fn emit_add_table(planner: &mut ScenarioPlanner<'_>, conn: SessionId, profile: TableWorkloadProfile) -> bool { + let is_event = planner + .roll_percent(profile.add_event_table_pct * 100 / (profile.add_table_pct + profile.add_event_table_pct).max(1)); + let extra_cols = 1 + planner.choose_index(3); + let mut columns = vec![ColumnPlan { + name: "id".into(), + ty: AlgebraicType::U64, + }]; + for col_idx in 0..extra_cols { + let ty = match planner.choose_index(4) { + 0 => AlgebraicType::Bool, + 1 => AlgebraicType::U64, + 2 => AlgebraicType::String, + _ => generate_supported_type_for_churn(planner), + }; + columns.push(ColumnPlan { + name: format!("c_add_{}_{}", planner.choose_index(1000), col_idx), + ty, + }); + } + let table_name = format!("dst_added_{}", planner.choose_index(10_000)); + let schema = TablePlan { + name: table_name, + columns, + extra_indexes: vec![], + is_event, + }; + planner.add_table(&schema, is_event); + planner.push_interaction(TableWorkloadInteraction::add_table(conn, schema)); + true +} + fn emit_unique_key_conflict_insert( planner: &mut ScenarioPlanner<'_>, conn: SessionId, diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs index 6b589b0cdaf..27cb76918e1 100644 --- a/crates/dst/src/workload/table_ops/types.rs +++ b/crates/dst/src/workload/table_ops/types.rs @@ -4,7 +4,9 @@ use spacetimedb_sats::AlgebraicValue; use crate::{ client::SessionId, - schema::{ColumnPlan, SchemaPlan, SimRow}, + config::{CommitlogFaultProfile, StorageFaultSummary}, + core::{RunOutcome, RunStats}, + schema::{ColumnPlan, SchemaPlan, SimRow, TablePlan}, sim::Rng, }; @@ -90,6 +92,17 @@ pub enum TableOperation { }, /// Scan all visible rows and compare against the model. FullScan { conn: SessionId, table: usize }, + /// Create a new table dynamically. + AddTable { conn: SessionId, schema: TablePlan }, + /// Drop an existing table. + DropTable { conn: SessionId, table: usize }, + /// Clear all rows from a table. + TruncateTable { conn: SessionId, table: usize }, + /// Close the database and reopen from the commitlog to exercise replay. + Reopen { conn: SessionId }, + /// Full scan every table and compare against the oracle model. + /// Useful right after a reopen to catch replay bugs early. + VerifyTables { conn: SessionId }, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -122,6 +135,12 @@ pub enum TableInteractionCase { PredicateCount, RangeScan, FullScan, + AddTable, + AddEventTable, + DropTable, + TruncateTable, + Reopen, + VerifyTables, } impl PlannedInteraction { @@ -264,15 +283,59 @@ impl PlannedInteraction { pub fn full_scan(conn: SessionId, table: usize) -> Self { Self::new(TableOperation::FullScan { conn, table }, TableInteractionCase::FullScan) } + + pub fn add_table(conn: SessionId, schema: TablePlan) -> Self { + let case = if schema.is_event { + TableInteractionCase::AddEventTable + } else { + TableInteractionCase::AddTable + }; + Self::new(TableOperation::AddTable { conn, schema }, case) + } + + pub fn truncate_table(conn: SessionId, table: usize) -> Self { + Self::new( + TableOperation::TruncateTable { conn, table }, + TableInteractionCase::TruncateTable, + ) + } + + pub fn drop_table(conn: SessionId, table: usize) -> Self { + Self::new( + TableOperation::DropTable { conn, table }, + TableInteractionCase::DropTable, + ) + } + + pub fn reopen(conn: SessionId) -> Self { + Self::new(TableOperation::Reopen { conn }, TableInteractionCase::Reopen) + } + + pub fn verify_tables(conn: SessionId) -> Self { + Self::new( + TableOperation::VerifyTables { conn }, + TableInteractionCase::VerifyTables, + ) + } } /// Final state gathered from a table-workload engine after execution ends. #[derive(Clone, Debug, Eq, PartialEq)] pub struct TableWorkloadOutcome { + /// Interactions executed by the shared runner. + pub interactions_executed: usize, + /// Commitlog fault profile used for this run. + pub commitlog_fault_profile: CommitlogFaultProfile, + /// Faults injected while executing this run. + pub commitlog_fault_summary: StorageFaultSummary, /// Row count for each table in schema order. pub final_row_counts: Vec, - /// Full committed rows for each table in schema order. - pub final_rows: Vec>, +} + +impl RunOutcome for TableWorkloadOutcome { + fn record_run_stats(&mut self, stats: RunStats) { + self.interactions_executed = stats.interactions_executed; + } } /// Per-session write transaction bookkeeping shared by locking targets. diff --git a/crates/engine/src/metrics.rs b/crates/engine/src/metrics.rs index f5ee0011b23..f74fbab2a6c 100644 --- a/crates/engine/src/metrics.rs +++ b/crates/engine/src/metrics.rs @@ -16,6 +16,9 @@ metrics_group!( #[name = spacetime_replay_total_time_seconds] #[help = "Total time spent replaying a database upon restart, including snapshot read, snapshot restore and commitlog replay"] #[labels(db: Identity)] + // We expect a small number of observations per label + // (exactly one, for non-replicated databases, and one per leader change for replicated databases) + // so we'll just store a `Gauge` with the most recent observation for each database. pub replay_total_time_seconds: GaugeVec, #[name = spacetime_replay_snapshot_read_time_seconds] @@ -70,6 +73,8 @@ metrics_group!( #[name = spacetime_snapshot_compression_time_total_sec] #[help = "The time (in seconds) it took to do a compression pass on the snapshot repository, including scheduling overhead"] #[labels(db: Identity)] + // Not sure what range to expect, but certainly slower than snapshot + // creation. #[buckets(0.001, 0.01, 0.1, 1.0, 5.0, 10.0)] pub snapshot_compression_time_total: HistogramVec, @@ -143,6 +148,7 @@ impl ExecutionCounters { } } + /// Update the global system metrics with transaction-level execution metrics. pub fn record(&self, metrics: &ExecutionMetrics) { if metrics.index_seeks > 0 { self.rdb_num_index_seeks.inc_by(metrics.index_seeks as u64); diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 8093ca61ca4..c6be69ab209 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,8 +1,3 @@ -#[cfg(all(feature = "tokio", feature = "simulation"))] -compile_error!( - "spacetimedb-runtime requires exactly one runtime backend: enable either `tokio` or `simulation`, not both" -); - #[cfg(not(any(feature = "tokio", feature = "simulation")))] compile_error!("spacetimedb-runtime requires exactly one runtime backend: enable either `tokio` or `simulation`");