From 7c86ff6c37cc93cca030bde060c41b8184b2cd61 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Mar 2024 16:49:14 +0100 Subject: [disable-scrub] implement a `disable_scrub` configuration option --- src/block/manager.rs | 68 ++++++++++++++++++++++++++++------------------------ src/model/garage.rs | 9 +------ src/util/config.rs | 4 ++++ 3 files changed, 42 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/block/manager.rs b/src/block/manager.rs index 890ea8b7..ef7279e9 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -22,7 +22,7 @@ use garage_net::stream::{read_stream_to_end, stream_asyncread, ByteStream}; use garage_db as db; use garage_util::background::{vars, BackgroundRunner}; -use garage_util::config::DataDirEnum; +use garage_util::config::Config; use garage_util::data::*; use garage_util::error::*; use garage_util::metrics::RecordDuration; @@ -84,6 +84,7 @@ pub struct BlockManager { data_fsync: bool, compression_level: Option, + disable_scrub: bool, mutation_lock: Vec>, @@ -119,9 +120,7 @@ struct BlockManagerLocked(); impl BlockManager { pub fn new( db: &db::Db, - data_dir: DataDirEnum, - data_fsync: bool, - compression_level: Option, + config: &Config, replication: TableShardedReplication, system: Arc, ) -> Result, Error> { @@ -131,11 +130,13 @@ impl BlockManager { let data_layout = match data_layout_persister.load() { Ok(mut layout) => { layout - .update(&data_dir) + .update(&config.data_dir) .ok_or_message("invalid data_dir config")?; layout } - Err(_) => DataLayout::initialize(&data_dir).ok_or_message("invalid data_dir config")?, + Err(_) => { + DataLayout::initialize(&config.data_dir).ok_or_message("invalid data_dir config")? + } }; data_layout_persister .save(&data_layout) @@ -154,7 +155,7 @@ impl BlockManager { .endpoint("garage_block/manager.rs/Rpc".to_string()); let metrics = BlockManagerMetrics::new( - compression_level, + config.compression_level, rc.rc.clone(), resync.queue.clone(), resync.errors.clone(), @@ -166,8 +167,9 @@ impl BlockManager { replication, data_layout: ArcSwap::new(Arc::new(data_layout)), data_layout_persister, - data_fsync, - compression_level, + data_fsync: config.data_fsync, + disable_scrub: config.disable_scrub, + compression_level: config.compression_level, mutation_lock: vec![(); MUTEX_COUNT] .iter() .map(|_| Mutex::new(BlockManagerLocked())) @@ -194,33 +196,37 @@ impl BlockManager { } // Spawn scrub worker - let (scrub_tx, scrub_rx) = mpsc::channel(1); - self.tx_scrub_command.store(Some(Arc::new(scrub_tx))); - bg.spawn_worker(ScrubWorker::new( - self.clone(), - scrub_rx, - self.scrub_persister.clone(), - )); + if !self.disable_scrub { + let (scrub_tx, scrub_rx) = mpsc::channel(1); + self.tx_scrub_command.store(Some(Arc::new(scrub_tx))); + bg.spawn_worker(ScrubWorker::new( + self.clone(), + scrub_rx, + self.scrub_persister.clone(), + )); + } } pub fn register_bg_vars(&self, vars: &mut vars::BgVars) { self.resync.register_bg_vars(vars); - vars.register_rw( - &self.scrub_persister, - "scrub-tranquility", - |p| p.get_with(|x| x.tranquility), - |p, tranquility| p.set_with(|x| x.tranquility = tranquility), - ); - vars.register_ro(&self.scrub_persister, "scrub-last-completed", |p| { - p.get_with(|x| msec_to_rfc3339(x.time_last_complete_scrub)) - }); - vars.register_ro(&self.scrub_persister, "scrub-next-run", |p| { - p.get_with(|x| msec_to_rfc3339(x.time_next_run_scrub)) - }); - vars.register_ro(&self.scrub_persister, "scrub-corruptions_detected", |p| { - p.get_with(|x| x.corruptions_detected) - }); + if !self.disable_scrub { + vars.register_rw( + &self.scrub_persister, + "scrub-tranquility", + |p| p.get_with(|x| x.tranquility), + |p, tranquility| p.set_with(|x| x.tranquility = tranquility), + ); + vars.register_ro(&self.scrub_persister, "scrub-last-completed", |p| { + p.get_with(|x| msec_to_rfc3339(x.time_last_complete_scrub)) + }); + vars.register_ro(&self.scrub_persister, "scrub-next-run", |p| { + p.get_with(|x| msec_to_rfc3339(x.time_next_run_scrub)) + }); + vars.register_ro(&self.scrub_persister, "scrub-corruptions_detected", |p| { + p.get_with(|x| x.corruptions_detected) + }); + } } /// Ask nodes that might have a (possibly compressed) block for it diff --git a/src/model/garage.rs b/src/model/garage.rs index 18421ca3..acf943f6 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -177,14 +177,7 @@ impl Garage { }; info!("Initialize block manager..."); - let block_manager = BlockManager::new( - &db, - config.data_dir.clone(), - config.data_fsync, - config.compression_level, - data_rep_param, - system.clone(), - )?; + let block_manager = BlockManager::new(&db, &config, data_rep_param, system.clone())?; block_manager.register_bg_vars(&mut bg_vars); // ---- admin tables ---- diff --git a/src/util/config.rs b/src/util/config.rs index 056c625d..7338a506 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -23,6 +23,10 @@ pub struct Config { #[serde(default)] pub data_fsync: bool, + /// Disable automatic scrubbing of the data directory + #[serde(default)] + pub disable_scrub: bool, + /// Size of data blocks to save to disk #[serde( deserialize_with = "deserialize_capacity", -- cgit v1.2.3 From 8dff278b7227007d8a2c2f6c6d6f76c0c5d7a1ac Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Mar 2024 17:24:53 +0100 Subject: [db-snapshot] Implement db snapshotting logic in garage_db --- src/db/Cargo.toml | 2 +- src/db/lib.rs | 12 ++++++++++++ src/db/lmdb_adapter.rs | 10 ++++++++++ src/db/sled_adapter.rs | 8 ++++++++ src/db/sqlite_adapter.rs | 12 ++++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/db/Cargo.toml b/src/db/Cargo.toml index d7c89620..324de74c 100644 --- a/src/db/Cargo.toml +++ b/src/db/Cargo.toml @@ -17,7 +17,7 @@ hexdump.workspace = true tracing.workspace = true heed = { workspace = true, optional = true } -rusqlite = { workspace = true, optional = true } +rusqlite = { workspace = true, optional = true, features = ["backup"] } sled = { workspace = true, optional = true } [dev-dependencies] diff --git a/src/db/lib.rs b/src/db/lib.rs index 0fb457ce..7f19172f 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -19,6 +19,7 @@ use core::ops::{Bound, RangeBounds}; use std::borrow::Cow; use std::cell::Cell; +use std::path::PathBuf; use std::sync::Arc; use err_derive::Error; @@ -48,6 +49,12 @@ pub type TxValueIter<'a> = Box); +impl From for Error { + fn from(e: std::io::Error) -> Error { + Error(format!("IO: {}", e).into()) + } +} + pub type Result = std::result::Result; #[derive(Debug, Error)] @@ -129,6 +136,10 @@ impl Db { } } + pub fn snapshot(&self, path: &PathBuf) -> Result<()> { + self.0.snapshot(path) + } + pub fn import(&self, other: &Db) -> Result<()> { let existing_trees = self.list_trees()?; if !existing_trees.is_empty() { @@ -325,6 +336,7 @@ pub(crate) trait IDb: Send + Sync { fn engine(&self) -> String; fn open_tree(&self, name: &str) -> Result; fn list_trees(&self) -> Result>; + fn snapshot(&self, path: &PathBuf) -> Result<()>; fn get(&self, tree: usize, key: &[u8]) -> Result>; fn len(&self, tree: usize) -> Result; diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs index 59fa132d..4b131aff 100644 --- a/src/db/lmdb_adapter.rs +++ b/src/db/lmdb_adapter.rs @@ -3,6 +3,7 @@ use core::ptr::NonNull; use std::collections::HashMap; use std::convert::TryInto; +use std::path::PathBuf; use std::sync::{Arc, RwLock}; use heed::types::ByteSlice; @@ -102,6 +103,15 @@ impl IDb for LmdbDb { Ok(ret2) } + fn snapshot(&self, to: &PathBuf) -> Result<()> { + std::fs::create_dir_all(to)?; + let mut path = to.clone(); + path.push("data.mdb"); + self.db + .copy_to_path(path, heed::CompactionOption::Disabled)?; + Ok(()) + } + // ---- fn get(&self, tree: usize, key: &[u8]) -> Result> { diff --git a/src/db/sled_adapter.rs b/src/db/sled_adapter.rs index 84f2001b..c34b4d81 100644 --- a/src/db/sled_adapter.rs +++ b/src/db/sled_adapter.rs @@ -2,6 +2,7 @@ use core::ops::Bound; use std::cell::Cell; use std::collections::HashMap; +use std::path::PathBuf; use std::sync::{Arc, RwLock}; use sled::transaction::{ @@ -96,6 +97,13 @@ impl IDb for SledDb { Ok(trees) } + fn snapshot(&self, to: &PathBuf) -> Result<()> { + let to_db = sled::open(to)?; + let export = self.db.export(); + to_db.import(export); + Ok(()) + } + // ---- fn get(&self, tree: usize, key: &[u8]) -> Result> { diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 9f967c66..827f3cc3 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -2,6 +2,7 @@ use core::ops::Bound; use std::borrow::BorrowMut; use std::marker::PhantomPinned; +use std::path::PathBuf; use std::pin::Pin; use std::ptr::NonNull; use std::sync::{Arc, Mutex, MutexGuard}; @@ -119,6 +120,17 @@ impl IDb for SqliteDb { Ok(trees) } + fn snapshot(&self, to: &PathBuf) -> Result<()> { + fn progress(p: rusqlite::backup::Progress) { + let percent = (p.pagecount - p.remaining) * 100 / p.pagecount; + info!("Sqlite snapshot progres: {}%", percent); + } + let this = self.0.lock().unwrap(); + this.db + .backup(rusqlite::DatabaseName::Main, to, Some(progress))?; + Ok(()) + } + // ---- fn get(&self, tree: usize, key: &[u8]) -> Result> { -- cgit v1.2.3 From 1e42808a59c35aae297d9dfd1c166c09f0b99347 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Mar 2024 18:21:37 +0100 Subject: [db-snapshot] implement meta_auto_snapshot_interval --- src/garage/server.rs | 2 +- src/model/Cargo.toml | 1 + src/model/garage.rs | 19 ++++++- src/model/lib.rs | 1 + src/model/snapshot.rs | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/util/config.rs | 4 ++ 6 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 src/model/snapshot.rs (limited to 'src') diff --git a/src/garage/server.rs b/src/garage/server.rs index 6323f957..65bf34db 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -51,7 +51,7 @@ pub async fn run_server(config_file: PathBuf, secrets: Secrets) -> Result<(), Er let (background, await_background_done) = BackgroundRunner::new(watch_cancel.clone()); info!("Spawning Garage workers..."); - garage.spawn_workers(&background); + garage.spawn_workers(&background)?; if config.admin.trace_sink.is_some() { info!("Initialize tracing..."); diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 2e5b047d..bde354b5 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -28,6 +28,7 @@ chrono.workspace = true err-derive.workspace = true hex.workspace = true base64.workspace = true +parse_duration.workspace = true tracing.workspace = true rand.workspace = true zstd.workspace = true diff --git a/src/model/garage.rs b/src/model/garage.rs index acf943f6..a6f60546 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -278,7 +278,7 @@ impl Garage { })) } - pub fn spawn_workers(self: &Arc, bg: &BackgroundRunner) { + pub fn spawn_workers(self: &Arc, bg: &BackgroundRunner) -> Result<(), Error> { self.block_manager.spawn_workers(bg); self.bucket_table.spawn_workers(bg); @@ -299,6 +299,23 @@ impl Garage { #[cfg(feature = "k2v")] self.k2v.spawn_workers(bg); + + if let Some(itv) = self.config.metadata_auto_snapshot_interval.as_deref() { + let interval = parse_duration::parse(itv) + .ok_or_message("Invalid `metadata_auto_snapshot_interval`")?; + if interval < std::time::Duration::from_secs(600) { + return Err(Error::Message( + "metadata_auto_snapshot_interval too small or negative".into(), + )); + } + + bg.spawn_worker(crate::snapshot::AutoSnapshotWorker::new( + self.clone(), + interval, + )); + } + + Ok(()) } pub fn bucket_helper(&self) -> helper::bucket::BucketHelper { diff --git a/src/model/lib.rs b/src/model/lib.rs index 4f20ea46..8ec338da 100644 --- a/src/model/lib.rs +++ b/src/model/lib.rs @@ -19,3 +19,4 @@ pub mod s3; pub mod garage; pub mod helper; pub mod migrate; +pub mod snapshot; diff --git a/src/model/snapshot.rs b/src/model/snapshot.rs new file mode 100644 index 00000000..36f9ec7d --- /dev/null +++ b/src/model/snapshot.rs @@ -0,0 +1,136 @@ +use std::fs; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::Mutex; +use std::time::{Duration, Instant}; + +use async_trait::async_trait; +use rand::prelude::*; +use tokio::sync::watch; + +use garage_util::background::*; +use garage_util::error::*; + +use crate::garage::Garage; + +// The two most recent snapshots are kept +const KEEP_SNAPSHOTS: usize = 2; + +static SNAPSHOT_MUTEX: Mutex<()> = Mutex::new(()); + +// ================ snapshotting logic ===================== + +/// Run snashot_metadata in a blocking thread and async await on it +pub async fn async_snapshot_metadata(garage: &Arc) -> Result<(), Error> { + let garage = garage.clone(); + let worker = tokio::task::spawn_blocking(move || snapshot_metadata(&garage)); + worker.await.unwrap()?; + Ok(()) +} + +/// Take a snapshot of the metadata database, and erase older +/// snapshots if necessary. +/// This is not an async function, it should be spawned on a thread pool +pub fn snapshot_metadata(garage: &Garage) -> Result<(), Error> { + let lock = match SNAPSHOT_MUTEX.try_lock() { + Ok(lock) => lock, + Err(_) => { + return Err(Error::Message( + "Cannot acquire lock, another snapshot might be in progress".into(), + )) + } + }; + + let mut snapshots_dir = garage.config.metadata_dir.clone(); + snapshots_dir.push("snapshots"); + fs::create_dir_all(&snapshots_dir)?; + + let mut new_path = snapshots_dir.clone(); + new_path.push(chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true)); + + info!("Snapshotting metadata db to {}", new_path.display()); + garage.db.snapshot(&new_path)?; + info!("Metadata db snapshot finished"); + + if let Err(e) = cleanup_snapshots(&snapshots_dir) { + error!("Failed to do cleanup in snapshots directory: {}", e); + } + + drop(lock); + + Ok(()) +} + +fn cleanup_snapshots(snapshots_dir: &PathBuf) -> Result<(), Error> { + let mut snapshots = + fs::read_dir(&snapshots_dir)?.collect::, std::io::Error>>()?; + + snapshots.retain(|x| x.file_name().len() > 8); + snapshots.sort_by_key(|x| x.file_name()); + + for to_delete in snapshots.iter().rev().skip(KEEP_SNAPSHOTS) { + let path = snapshots_dir.join(to_delete.path()); + if to_delete.metadata()?.file_type().is_dir() { + for file in fs::read_dir(&path)? { + let file = file?; + if file.metadata()?.is_file() { + fs::remove_file(path.join(file.path()))?; + } + } + std::fs::remove_dir(&path)?; + } else { + std::fs::remove_file(&path)?; + } + } + Ok(()) +} + +// ================ auto snapshot worker ===================== + +pub struct AutoSnapshotWorker { + garage: Arc, + next_snapshot: Instant, + snapshot_interval: Duration, +} + +impl AutoSnapshotWorker { + pub(crate) fn new(garage: Arc, snapshot_interval: Duration) -> Self { + Self { + garage, + snapshot_interval, + next_snapshot: Instant::now() + (snapshot_interval / 2), + } + } +} + +#[async_trait] +impl Worker for AutoSnapshotWorker { + fn name(&self) -> String { + "Metadata snapshot worker".into() + } + fn status(&self) -> WorkerStatus { + WorkerStatus { + freeform: vec![format!( + "Next snapshot: {}", + (chrono::Utc::now() + (self.next_snapshot - Instant::now())).to_rfc3339() + )], + ..Default::default() + } + } + async fn work(&mut self, _must_exit: &mut watch::Receiver) -> Result { + if Instant::now() < self.next_snapshot { + return Ok(WorkerState::Idle); + } + + async_snapshot_metadata(&self.garage).await?; + + let rand_factor = 1f32 + thread_rng().gen::() / 5f32; + self.next_snapshot = Instant::now() + self.snapshot_interval.mul_f32(rand_factor); + + Ok(WorkerState::Idle) + } + async fn wait_for_work(&mut self) -> WorkerState { + tokio::time::sleep_until(self.next_snapshot.into()).await; + WorkerState::Busy + } +} diff --git a/src/util/config.rs b/src/util/config.rs index 7338a506..8ecbdfbb 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -27,6 +27,10 @@ pub struct Config { #[serde(default)] pub disable_scrub: bool, + /// Automatic snapshot interval for metadata + #[serde(default)] + pub metadata_auto_snapshot_interval: Option, + /// Size of data blocks to save to disk #[serde( deserialize_with = "deserialize_capacity", -- cgit v1.2.3 From a68c37555d15bb19a10f74c7ee85485a5228ab66 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 15 Mar 2024 10:56:57 +0100 Subject: [db-snapshot] add garage meta snapshot cli operation --- src/garage/admin/mod.rs | 40 ++++++++++++++++++++++++++++++++++++++++ src/garage/cli/cmd.rs | 3 +++ src/garage/cli/structs.rs | 15 +++++++++++++++ 3 files changed, 58 insertions(+) (limited to 'src') diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index b6f9c426..f01ef3d6 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -46,6 +46,7 @@ pub enum AdminRpc { Stats(StatsOpt), Worker(WorkerOperation), BlockOperation(BlockOperation), + MetaOperation(MetaOperation), // Replies Ok(String), @@ -518,6 +519,44 @@ impl AdminRpcHandler { )])) } } + + // ================ META DB COMMANDS ==================== + + async fn handle_meta_cmd(self: &Arc, mo: &MetaOperation) -> Result { + match mo { + MetaOperation::Snapshot { all: true } => { + let ring = self.garage.system.ring.borrow().clone(); + let to = ring.layout.node_ids().to_vec(); + + let resps = futures::future::join_all(to.iter().map(|to| async move { + let to = (*to).into(); + self.endpoint + .call( + &to, + AdminRpc::MetaOperation(MetaOperation::Snapshot { all: false }), + PRIO_NORMAL, + ) + .await + })) + .await; + + let mut ret = vec![]; + for (to, resp) in to.iter().zip(resps.iter()) { + let res_str = match resp { + Ok(_) => "ok".to_string(), + Err(e) => format!("error: {}", e), + }; + ret.push(format!("{:?}\t{}", to, res_str)); + } + + Ok(AdminRpc::Ok(format_table_to_string(ret))) + } + MetaOperation::Snapshot { all: false } => { + garage_model::snapshot::async_snapshot_metadata(&self.garage).await?; + Ok(AdminRpc::Ok("Snapshot has been saved.".into())) + } + } + } } #[async_trait] @@ -535,6 +574,7 @@ impl EndpointHandler for AdminRpcHandler { AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await, AdminRpc::Worker(wo) => self.handle_worker_cmd(wo).await, AdminRpc::BlockOperation(bo) => self.handle_block_cmd(bo).await, + AdminRpc::MetaOperation(mo) => self.handle_meta_cmd(mo).await, m => Err(GarageError::unexpected_rpc_message(m).into()), } } diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index 48359614..4c0a5322 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -44,6 +44,9 @@ pub async fn cli_command_dispatch( Command::Block(bo) => { cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BlockOperation(bo)).await } + Command::Meta(mo) => { + cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::MetaOperation(mo)).await + } _ => unreachable!(), } } diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index be4d5bd6..51d2bed3 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -57,6 +57,10 @@ pub enum Command { #[structopt(name = "block", version = garage_version())] Block(BlockOperation), + /// Operations on the metadata db + #[structopt(name = "meta", version = garage_version())] + Meta(MetaOperation), + /// Convert metadata db between database engine formats #[structopt(name = "convert-db", version = garage_version())] ConvertDb(convert_db::ConvertDbOpt), @@ -617,3 +621,14 @@ pub enum BlockOperation { blocks: Vec, }, } + +#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone, Copy)] +pub enum MetaOperation { + /// Save a snapshot of the metadata db file + #[structopt(name = "snapshot", version = garage_version())] + Snapshot { + /// Run on all nodes instead of only local node + #[structopt(long = "all")] + all: bool, + }, +} -- cgit v1.2.3 From e8f9718ccd656dacc4fba2ed9fa5d8abf12ad37b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 18 Mar 2024 17:08:54 +0100 Subject: [sqlite-r2d2] implement connection pooling in sqlite backend --- src/db/Cargo.toml | 4 +- src/db/open.rs | 10 +-- src/db/sqlite_adapter.rs | 204 ++++++++++++++++++++++------------------------- src/db/test.rs | 3 +- 4 files changed, 103 insertions(+), 118 deletions(-) (limited to 'src') diff --git a/src/db/Cargo.toml b/src/db/Cargo.toml index 324de74c..baa94bae 100644 --- a/src/db/Cargo.toml +++ b/src/db/Cargo.toml @@ -18,6 +18,8 @@ tracing.workspace = true heed = { workspace = true, optional = true } rusqlite = { workspace = true, optional = true, features = ["backup"] } +r2d2 = { workspace = true, optional = true } +r2d2_sqlite = { workspace = true, optional = true } sled = { workspace = true, optional = true } [dev-dependencies] @@ -27,4 +29,4 @@ mktemp.workspace = true default = [ "sled", "lmdb", "sqlite" ] bundled-libs = [ "rusqlite?/bundled" ] lmdb = [ "heed" ] -sqlite = [ "rusqlite" ] +sqlite = [ "rusqlite", "r2d2", "r2d2_sqlite" ] diff --git a/src/db/open.rs b/src/db/open.rs index ae135c4e..59d06f2e 100644 --- a/src/db/open.rs +++ b/src/db/open.rs @@ -91,14 +91,8 @@ pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result { #[cfg(feature = "sqlite")] Engine::Sqlite => { info!("Opening Sqlite database at: {}", path.display()); - let db = crate::sqlite_adapter::rusqlite::Connection::open(&path)?; - db.pragma_update(None, "journal_mode", "WAL")?; - if opt.fsync { - db.pragma_update(None, "synchronous", "NORMAL")?; - } else { - db.pragma_update(None, "synchronous", "OFF")?; - } - Ok(crate::sqlite_adapter::SqliteDb::init(db)) + let manager = r2d2_sqlite::SqliteConnectionManager::file(path); + Ok(crate::sqlite_adapter::SqliteDb::new(manager, opt.fsync)?) } // ---- LMDB DB ---- diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 827f3cc3..3eccfdde 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -1,13 +1,14 @@ use core::ops::Bound; -use std::borrow::BorrowMut; use std::marker::PhantomPinned; use std::path::PathBuf; use std::pin::Pin; use std::ptr::NonNull; -use std::sync::{Arc, Mutex, MutexGuard}; +use std::sync::{Arc, Mutex, RwLock}; -use rusqlite::{params, Connection, Rows, Statement, Transaction}; +use r2d2::Pool; +use r2d2_sqlite::SqliteConnectionManager; +use rusqlite::{params, Rows, Statement, Transaction}; use crate::{ Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult, @@ -16,6 +17,8 @@ use crate::{ pub use rusqlite; +type Connection = r2d2::PooledConnection; + // --- err impl From for Error { @@ -24,6 +27,12 @@ impl From for Error { } } +impl From for Error { + fn from(e: r2d2::Error) -> Error { + Error(format!("Sqlite: {}", e).into()) + } +} + impl From for TxOpError { fn from(e: rusqlite::Error) -> TxOpError { TxOpError(e.into()) @@ -32,35 +41,47 @@ impl From for TxOpError { // -- db -pub struct SqliteDb(Mutex); - -struct SqliteDbInner { - db: Connection, - trees: Vec, +pub struct SqliteDb { + db: Pool, + trees: RwLock>>, + // All operations that might write on the DB must take this lock first. + // This emulates LMDB's approach where a single writer can be + // active at once. + write_lock: Mutex<()>, } impl SqliteDb { - pub fn init(db: rusqlite::Connection) -> Db { - let s = Self(Mutex::new(SqliteDbInner { - db, - trees: Vec::new(), - })); - Db(Arc::new(s)) + pub fn new(manager: SqliteConnectionManager, sync_mode: bool) -> Result { + let manager = manager.with_init(move |db| { + db.pragma_update(None, "journal_mode", "WAL")?; + if sync_mode { + db.pragma_update(None, "synchronous", "NORMAL")?; + } else { + db.pragma_update(None, "synchronous", "OFF")?; + } + Ok(()) + }); + let s = Self { + db: Pool::builder().build(manager)?, + trees: RwLock::new(vec![]), + write_lock: Mutex::new(()), + }; + Ok(Db(Arc::new(s))) } } -impl SqliteDbInner { - fn get_tree(&self, i: usize) -> Result<&'_ str> { +impl SqliteDb { + fn get_tree(&self, i: usize) -> Result> { self.trees + .read() + .unwrap() .get(i) - .map(String::as_str) + .cloned() .ok_or_else(|| Error("invalid tree id".into())) } - fn internal_get(&self, tree: &str, key: &[u8]) -> Result> { - let mut stmt = self - .db - .prepare(&format!("SELECT v FROM {} WHERE k = ?1", tree))?; + fn internal_get(&self, db: &Connection, tree: &str, key: &[u8]) -> Result> { + let mut stmt = db.prepare(&format!("SELECT v FROM {} WHERE k = ?1", tree))?; let mut res_iter = stmt.query([key])?; match res_iter.next()? { None => Ok(None), @@ -76,13 +97,14 @@ impl IDb for SqliteDb { fn open_tree(&self, name: &str) -> Result { let name = format!("tree_{}", name.replace(':', "_COLON_")); - let mut this = self.0.lock().unwrap(); + let mut trees = self.trees.write().unwrap(); - if let Some(i) = this.trees.iter().position(|x| x == &name) { + if let Some(i) = trees.iter().position(|x| x.as_ref() == &name) { Ok(i) } else { + let db = self.db.get()?; trace!("create table {}", name); - this.db.execute( + db.execute( &format!( "CREATE TABLE IF NOT EXISTS {} ( k BLOB PRIMARY KEY, @@ -94,8 +116,8 @@ impl IDb for SqliteDb { )?; trace!("table created: {}, unlocking", name); - let i = this.trees.len(); - this.trees.push(name.to_string()); + let i = trees.len(); + trees.push(name.to_string().into_boxed_str().into()); Ok(i) } } @@ -103,11 +125,8 @@ impl IDb for SqliteDb { fn list_trees(&self) -> Result> { let mut trees = vec![]; - trace!("list_trees: lock db"); - let this = self.0.lock().unwrap(); - trace!("list_trees: lock acquired"); - - let mut stmt = this.db.prepare( + let db = self.db.get()?; + let mut stmt = db.prepare( "SELECT name FROM sqlite_schema WHERE type = 'table' AND name LIKE 'tree_%'", )?; let mut rows = stmt.query([])?; @@ -125,8 +144,8 @@ impl IDb for SqliteDb { let percent = (p.pagecount - p.remaining) * 100 / p.pagecount; info!("Sqlite snapshot progres: {}%", percent); } - let this = self.0.lock().unwrap(); - this.db + self.db + .get()? .backup(rusqlite::DatabaseName::Main, to, Some(progress))?; Ok(()) } @@ -134,21 +153,15 @@ impl IDb for SqliteDb { // ---- fn get(&self, tree: usize, key: &[u8]) -> Result> { - trace!("get {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("get {}: lock acquired", tree); - - let tree = this.get_tree(tree)?; - this.internal_get(tree, key) + let tree = self.get_tree(tree)?; + self.internal_get(&self.db.get()?, &tree, key) } fn len(&self, tree: usize) -> Result { - trace!("len {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("len {}: lock acquired", tree); + let tree = self.get_tree(tree)?; + let db = self.db.get()?; - let tree = this.get_tree(tree)?; - let mut stmt = this.db.prepare(&format!("SELECT COUNT(*) FROM {}", tree))?; + let mut stmt = db.prepare(&format!("SELECT COUNT(*) FROM {}", tree))?; let mut res_iter = stmt.query([])?; match res_iter.next()? { None => Ok(0), @@ -161,69 +174,60 @@ impl IDb for SqliteDb { } fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result> { - trace!("insert {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("insert {}: lock acquired", tree); + let tree = self.get_tree(tree)?; + let db = self.db.get()?; + let lock = self.write_lock.lock(); - let tree = this.get_tree(tree)?; - let old_val = this.internal_get(tree, key)?; + let old_val = self.internal_get(&db, &tree, key)?; let sql = match &old_val { Some(_) => format!("UPDATE {} SET v = ?2 WHERE k = ?1", tree), None => format!("INSERT INTO {} (k, v) VALUES (?1, ?2)", tree), }; - let n = this.db.execute(&sql, params![key, value])?; + let n = db.execute(&sql, params![key, value])?; assert_eq!(n, 1); + drop(lock); Ok(old_val) } fn remove(&self, tree: usize, key: &[u8]) -> Result> { - trace!("remove {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("remove {}: lock acquired", tree); + let tree = self.get_tree(tree)?; + let db = self.db.get()?; + let lock = self.write_lock.lock(); - let tree = this.get_tree(tree)?; - let old_val = this.internal_get(tree, key)?; + let old_val = self.internal_get(&db, &tree, key)?; if old_val.is_some() { - let n = this - .db - .execute(&format!("DELETE FROM {} WHERE k = ?1", tree), params![key])?; + let n = db.execute(&format!("DELETE FROM {} WHERE k = ?1", tree), params![key])?; assert_eq!(n, 1); } + drop(lock); Ok(old_val) } fn clear(&self, tree: usize) -> Result<()> { - trace!("clear {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("clear {}: lock acquired", tree); + let tree = self.get_tree(tree)?; + let db = self.db.get()?; + let lock = self.write_lock.lock(); + + db.execute(&format!("DELETE FROM {}", tree), [])?; - let tree = this.get_tree(tree)?; - this.db.execute(&format!("DELETE FROM {}", tree), [])?; + drop(lock); Ok(()) } fn iter(&self, tree: usize) -> Result> { - trace!("iter {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("iter {}: lock acquired", tree); - - let tree = this.get_tree(tree)?; + let tree = self.get_tree(tree)?; let sql = format!("SELECT k, v FROM {} ORDER BY k ASC", tree); - DbValueIterator::make(this, &sql, []) + DbValueIterator::make(self.db.get()?, &sql, []) } fn iter_rev(&self, tree: usize) -> Result> { - trace!("iter_rev {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("iter_rev {}: lock acquired", tree); - - let tree = this.get_tree(tree)?; + let tree = self.get_tree(tree)?; let sql = format!("SELECT k, v FROM {} ORDER BY k DESC", tree); - DbValueIterator::make(this, &sql, []) + DbValueIterator::make(self.db.get()?, &sql, []) } fn range<'r>( @@ -232,11 +236,7 @@ impl IDb for SqliteDb { low: Bound<&'r [u8]>, high: Bound<&'r [u8]>, ) -> Result> { - trace!("range {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("range {}: lock acquired", tree); - - let tree = this.get_tree(tree)?; + let tree = self.get_tree(tree)?; let (bounds_sql, params) = bounds_sql(low, high); let sql = format!("SELECT k, v FROM {} {} ORDER BY k ASC", tree, bounds_sql); @@ -246,7 +246,7 @@ impl IDb for SqliteDb { .map(|x| x as &dyn rusqlite::ToSql) .collect::>(); - DbValueIterator::make::<&[&dyn rusqlite::ToSql]>(this, &sql, params.as_ref()) + DbValueIterator::make::<&[&dyn rusqlite::ToSql]>(self.db.get()?, &sql, params.as_ref()) } fn range_rev<'r>( &self, @@ -254,11 +254,7 @@ impl IDb for SqliteDb { low: Bound<&'r [u8]>, high: Bound<&'r [u8]>, ) -> Result> { - trace!("range_rev {}: lock db", tree); - let this = self.0.lock().unwrap(); - trace!("range_rev {}: lock acquired", tree); - - let tree = this.get_tree(tree)?; + let tree = self.get_tree(tree)?; let (bounds_sql, params) = bounds_sql(low, high); let sql = format!("SELECT k, v FROM {} {} ORDER BY k DESC", tree, bounds_sql); @@ -268,25 +264,20 @@ impl IDb for SqliteDb { .map(|x| x as &dyn rusqlite::ToSql) .collect::>(); - DbValueIterator::make::<&[&dyn rusqlite::ToSql]>(this, &sql, params.as_ref()) + DbValueIterator::make::<&[&dyn rusqlite::ToSql]>(self.db.get()?, &sql, params.as_ref()) } // ---- fn transaction(&self, f: &dyn ITxFn) -> TxResult { - trace!("transaction: lock db"); - let mut this = self.0.lock().unwrap(); - trace!("transaction: lock acquired"); - - let this_mut_ref: &mut SqliteDbInner = this.borrow_mut(); + let mut db = self.db.get().map_err(Error::from).map_err(TxError::Db)?; + let trees = self.trees.read().unwrap(); + let lock = self.write_lock.lock(); + trace!("trying transaction"); let mut tx = SqliteTx { - tx: this_mut_ref - .db - .transaction() - .map_err(Error::from) - .map_err(TxError::Db)?, - trees: &this_mut_ref.trees, + tx: db.transaction().map_err(Error::from).map_err(TxError::Db)?, + trees: &trees, }; let res = match f.try_on(&mut tx) { TxFnResult::Ok(on_commit) => { @@ -306,7 +297,8 @@ impl IDb for SqliteDb { }; trace!("transaction done"); - res + drop(lock); + return res; } } @@ -314,12 +306,12 @@ impl IDb for SqliteDb { struct SqliteTx<'a> { tx: Transaction<'a>, - trees: &'a [String], + trees: &'a [Arc], } impl<'a> SqliteTx<'a> { fn get_tree(&self, i: usize) -> TxOpResult<&'_ str> { - self.trees.get(i).map(String::as_ref).ok_or_else(|| { + self.trees.get(i).map(Arc::as_ref).ok_or_else(|| { TxOpError(Error( "invalid tree id (it might have been openned after the transaction started)".into(), )) @@ -408,18 +400,14 @@ impl<'a> ITx for SqliteTx<'a> { // ---- struct DbValueIterator<'a> { - db: MutexGuard<'a, SqliteDbInner>, + db: Connection, stmt: Option>, iter: Option>, _pin: PhantomPinned, } impl<'a> DbValueIterator<'a> { - fn make( - db: MutexGuard<'a, SqliteDbInner>, - sql: &str, - args: P, - ) -> Result> { + fn make(db: Connection, sql: &str, args: P) -> Result> { let res = DbValueIterator { db, stmt: None, @@ -431,7 +419,7 @@ impl<'a> DbValueIterator<'a> { unsafe { let db = NonNull::from(&boxed.db); - let stmt = db.as_ref().db.prepare(sql)?; + let stmt = db.as_ref().prepare(sql)?; let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed); Pin::get_unchecked_mut(mut_ref).stmt = Some(stmt); diff --git a/src/db/test.rs b/src/db/test.rs index cd99eafa..cad25f4d 100644 --- a/src/db/test.rs +++ b/src/db/test.rs @@ -106,6 +106,7 @@ fn test_sled_db() { fn test_sqlite_db() { use crate::sqlite_adapter::SqliteDb; - let db = SqliteDb::init(rusqlite::Connection::open_in_memory().unwrap()); + let manager = r2d2_sqlite::SqliteConnectionManager::memory(); + let db = SqliteDb::new(manager, false).unwrap(); test_suite(db); } -- cgit v1.2.3 From b55f52a9b75359b02938ac003a6ea853b36a4f3e Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 18 Mar 2024 17:58:34 +0100 Subject: [sqlite-r2d2] run integration test with all db engines --- src/garage/tests/common/garage.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/garage/tests/common/garage.rs b/src/garage/tests/common/garage.rs index d1f0867a..006337ee 100644 --- a/src/garage/tests/common/garage.rs +++ b/src/garage/tests/common/garage.rs @@ -42,6 +42,10 @@ impl Instance { .ok() .unwrap_or_else(|| env::temp_dir().join(format!("garage-integ-test-{}", port))); + let db_engine = env::var("GARAGE_TEST_INTEGRATION_DB_ENGINE") + .ok() + .unwrap_or_else(|| "lmdb".into()); + // Clean test runtime directory if path.exists() { fs::remove_dir_all(&path).expect("Could not clean test runtime directory"); @@ -52,7 +56,7 @@ impl Instance { r#" metadata_dir = "{path}/meta" data_dir = "{path}/data" -db_engine = "lmdb" +db_engine = "{db_engine}" replication_mode = "1" -- cgit v1.2.3