diff options
author | Alex <alex@adnab.me> | 2024-03-12 10:45:57 +0000 |
---|---|---|
committer | Alex <alex@adnab.me> | 2024-03-12 10:45:57 +0000 |
commit | 81191d2d92e58ff82ace0f4d82b275c157673ade (patch) | |
tree | 53396e8ddbb94c4cc2f7f037a66c12a3c125c22f /src | |
parent | 2128b5febd97eb6c95e3bdd380dcba39b860c0e3 (diff) | |
parent | 2795b53b8b3ebd162df6b0244b73889e72f67ce0 (diff) | |
download | garage-81191d2d92e58ff82ace0f4d82b275c157673ade.tar.gz garage-81191d2d92e58ff82ace0f4d82b275c157673ade.zip |
Merge pull request 'Remove Sled' (#767) from rm-sled into next-0.10
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/767
Diffstat (limited to 'src')
-rw-r--r-- | src/block/manager.rs | 7 | ||||
-rw-r--r-- | src/block/metrics.rs | 17 | ||||
-rw-r--r-- | src/block/resync.rs | 21 | ||||
-rw-r--r-- | src/db/Cargo.toml | 3 | ||||
-rw-r--r-- | src/db/counted_tree_hack.rs | 127 | ||||
-rw-r--r-- | src/db/lib.rs | 18 | ||||
-rw-r--r-- | src/db/lmdb_adapter.rs | 90 | ||||
-rw-r--r-- | src/db/open.rs | 27 | ||||
-rw-r--r-- | src/db/sled_adapter.rs | 274 | ||||
-rw-r--r-- | src/db/sqlite_adapter.rs | 184 | ||||
-rw-r--r-- | src/db/test.rs | 60 | ||||
-rw-r--r-- | src/garage/Cargo.toml | 5 | ||||
-rw-r--r-- | src/garage/admin/mod.rs | 51 | ||||
-rw-r--r-- | src/garage/cli/convert_db.rs | 2 | ||||
-rw-r--r-- | src/garage/cli/structs.rs | 4 | ||||
-rw-r--r-- | src/garage/main.rs | 6 | ||||
-rw-r--r-- | src/model/Cargo.toml | 3 | ||||
-rw-r--r-- | src/model/garage.rs | 5 | ||||
-rw-r--r-- | src/model/s3/lifecycle_worker.rs | 8 | ||||
-rw-r--r-- | src/table/data.rs | 6 | ||||
-rw-r--r-- | src/table/gc.rs | 24 | ||||
-rw-r--r-- | src/table/merkle.rs | 8 | ||||
-rw-r--r-- | src/table/metrics.rs | 21 | ||||
-rw-r--r-- | src/util/config.rs | 19 |
24 files changed, 328 insertions, 662 deletions
diff --git a/src/block/manager.rs b/src/block/manager.rs index c7e4df17..18fadf85 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -378,11 +378,6 @@ impl BlockManager { Ok(self.rc.rc.len()?) } - /// Get number of items in the refcount table - pub fn rc_fast_len(&self) -> Result<Option<usize>, Error> { - Ok(self.rc.rc.fast_len()?) - } - /// Send command to start/stop/manager scrub worker pub async fn send_scrub_command(&self, cmd: ScrubWorkerCommand) -> Result<(), Error> { let tx = self.tx_scrub_command.load(); @@ -398,7 +393,7 @@ impl BlockManager { /// List all resync errors pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> { - let mut blocks = Vec::with_capacity(self.resync.errors.len()); + let mut blocks = Vec::with_capacity(self.resync.errors.len()?); for ent in self.resync.errors.iter()? { let (hash, cnt) = ent?; let cnt = ErrorCounter::decode(&cnt); diff --git a/src/block/metrics.rs b/src/block/metrics.rs index 6659df32..8e10afdf 100644 --- a/src/block/metrics.rs +++ b/src/block/metrics.rs @@ -1,7 +1,6 @@ use opentelemetry::{global, metrics::*}; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; /// TableMetrics reference all counter used for metrics pub struct BlockManagerMetrics { @@ -29,8 +28,8 @@ impl BlockManagerMetrics { pub fn new( compression_level: Option<i32>, rc_tree: db::Tree, - resync_queue: CountedTree, - resync_errors: CountedTree, + resync_queue: db::Tree, + resync_errors: db::Tree, ) -> Self { let meter = global::meter("garage_model/block"); Self { @@ -45,15 +44,17 @@ impl BlockManagerMetrics { .init(), _rc_size: meter .u64_value_observer("block.rc_size", move |observer| { - if let Ok(Some(v)) = rc_tree.fast_len() { - observer.observe(v as u64, &[]) + if let Ok(value) = rc_tree.len() { + observer.observe(value as u64, &[]) } }) .with_description("Number of blocks known to the reference counter") .init(), _resync_queue_len: meter .u64_value_observer("block.resync_queue_length", move |observer| { - observer.observe(resync_queue.len() as u64, &[]) + if let Ok(value) = resync_queue.len() { + observer.observe(value as u64, &[]); + } }) .with_description( "Number of block hashes queued for local check and possible resync", @@ -61,7 +62,9 @@ impl BlockManagerMetrics { .init(), _resync_errored_blocks: meter .u64_value_observer("block.resync_errored_blocks", move |observer| { - observer.observe(resync_errors.len() as u64, &[]) + if let Ok(value) = resync_errors.len() { + observer.observe(value as u64, &[]); + } }) .with_description("Number of block hashes whose last resync resulted in an error") .init(), diff --git a/src/block/resync.rs b/src/block/resync.rs index 15f210e4..48c2cef1 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -15,7 +15,6 @@ use opentelemetry::{ }; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; use garage_util::background::*; use garage_util::data::*; @@ -47,9 +46,9 @@ pub(crate) const MAX_RESYNC_WORKERS: usize = 8; const INITIAL_RESYNC_TRANQUILITY: u32 = 2; pub struct BlockResyncManager { - pub(crate) queue: CountedTree, + pub(crate) queue: db::Tree, pub(crate) notify: Arc<Notify>, - pub(crate) errors: CountedTree, + pub(crate) errors: db::Tree, busy_set: BusySet, @@ -90,12 +89,10 @@ impl BlockResyncManager { let queue = db .open_tree("block_local_resync_queue") .expect("Unable to open block_local_resync_queue tree"); - let queue = CountedTree::new(queue).expect("Could not count block_local_resync_queue"); let errors = db .open_tree("block_local_resync_errors") .expect("Unable to open block_local_resync_errors tree"); - let errors = CountedTree::new(errors).expect("Could not count block_local_resync_errors"); let persister = PersisterShared::new(&system.metadata_dir, "resync_cfg"); @@ -110,16 +107,12 @@ impl BlockResyncManager { /// Get lenght of resync queue pub fn queue_len(&self) -> Result<usize, Error> { - // This currently can't return an error because the CountedTree hack - // doesn't error on .len(), but this will change when we remove the hack - // (hopefully someday!) - Ok(self.queue.len()) + Ok(self.queue.len()?) } /// Get number of blocks that have an error pub fn errors_len(&self) -> Result<usize, Error> { - // (see queue_len comment) - Ok(self.errors.len()) + Ok(self.errors.len()?) } /// Clear the error counter for a block and put it in queue immediately @@ -180,7 +173,7 @@ impl BlockResyncManager { // deleted once the garbage collection delay has passed. // // Here are some explanations on how the resync queue works. - // There are two Sled trees that are used to have information + // There are two db trees that are used to have information // about the status of blocks that need to be resynchronized: // // - resync.queue: a tree that is ordered first by a timestamp @@ -541,9 +534,9 @@ impl Worker for ResyncWorker { Ok(WorkerState::Idle) } Err(e) => { - // The errors that we have here are only Sled errors + // The errors that we have here are only db errors // We don't really know how to handle them so just ¯\_(ツ)_/¯ - // (there is kind of an assumption that Sled won't error on us, + // (there is kind of an assumption that the db won't error on us, // if it does there is not much we can do -- TODO should we just panic?) // Here we just give the error to the worker manager, // it will print it to the logs and increment a counter diff --git a/src/db/Cargo.toml b/src/db/Cargo.toml index fddc5cca..a8f6d586 100644 --- a/src/db/Cargo.toml +++ b/src/db/Cargo.toml @@ -18,13 +18,12 @@ tracing.workspace = true heed = { workspace = true, optional = true } rusqlite = { workspace = true, optional = true } -sled = { workspace = true, optional = true } [dev-dependencies] mktemp.workspace = true [features] -default = [ "sled", "lmdb", "sqlite" ] +default = [ "lmdb", "sqlite" ] bundled-libs = [ "rusqlite?/bundled" ] lmdb = [ "heed" ] sqlite = [ "rusqlite" ] diff --git a/src/db/counted_tree_hack.rs b/src/db/counted_tree_hack.rs deleted file mode 100644 index a4ce12e0..00000000 --- a/src/db/counted_tree_hack.rs +++ /dev/null @@ -1,127 +0,0 @@ -//! This hack allows a db tree to keep in RAM a counter of the number of entries -//! it contains, which is used to call .len() on it. This is usefull only for -//! the sled backend where .len() otherwise would have to traverse the whole -//! tree to count items. For sqlite and lmdb, this is mostly useless (but -//! hopefully not harmfull!). Note that a CountedTree cannot be part of a -//! transaction. - -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, -}; - -use crate::{Result, Tree, TxError, Value, ValueIter}; - -#[derive(Clone)] -pub struct CountedTree(Arc<CountedTreeInternal>); - -struct CountedTreeInternal { - tree: Tree, - len: AtomicUsize, -} - -impl CountedTree { - pub fn new(tree: Tree) -> Result<Self> { - let len = tree.len()?; - Ok(Self(Arc::new(CountedTreeInternal { - tree, - len: AtomicUsize::new(len), - }))) - } - - pub fn len(&self) -> usize { - self.0.len.load(Ordering::SeqCst) - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> { - self.0.tree.get(key) - } - - pub fn first(&self) -> Result<Option<(Value, Value)>> { - self.0.tree.first() - } - - pub fn iter(&self) -> Result<ValueIter<'_>> { - self.0.tree.iter() - } - - // ---- writing functions ---- - - pub fn insert<K, V>(&self, key: K, value: V) -> Result<Option<Value>> - where - K: AsRef<[u8]>, - V: AsRef<[u8]>, - { - let old_val = self.0.tree.insert(key, value)?; - if old_val.is_none() { - self.0.len.fetch_add(1, Ordering::SeqCst); - } - Ok(old_val) - } - - pub fn remove<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> { - let old_val = self.0.tree.remove(key)?; - if old_val.is_some() { - self.0.len.fetch_sub(1, Ordering::SeqCst); - } - Ok(old_val) - } - - pub fn compare_and_swap<K, OV, NV>( - &self, - key: K, - expected_old: Option<OV>, - new: Option<NV>, - ) -> Result<bool> - where - K: AsRef<[u8]>, - OV: AsRef<[u8]>, - NV: AsRef<[u8]>, - { - let old_some = expected_old.is_some(); - let new_some = new.is_some(); - - let tx_res = self.0.tree.db().transaction(|tx| { - let old_val = tx.get(&self.0.tree, &key)?; - let is_same = match (&old_val, &expected_old) { - (None, None) => true, - (Some(x), Some(y)) if x == y.as_ref() => true, - _ => false, - }; - if is_same { - match &new { - Some(v) => { - tx.insert(&self.0.tree, &key, v)?; - } - None => { - tx.remove(&self.0.tree, &key)?; - } - } - Ok(()) - } else { - Err(TxError::Abort(())) - } - }); - - match tx_res { - Ok(()) => { - match (old_some, new_some) { - (false, true) => { - self.0.len.fetch_add(1, Ordering::SeqCst); - } - (true, false) => { - self.0.len.fetch_sub(1, Ordering::SeqCst); - } - _ => (), - } - Ok(true) - } - Err(TxError::Abort(())) => Ok(false), - Err(TxError::Db(e)) => Err(e), - } - } -} diff --git a/src/db/lib.rs b/src/db/lib.rs index 0fb457ce..ff511b5f 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -3,13 +3,9 @@ extern crate tracing; #[cfg(feature = "lmdb")] pub mod lmdb_adapter; -#[cfg(feature = "sled")] -pub mod sled_adapter; #[cfg(feature = "sqlite")] pub mod sqlite_adapter; -pub mod counted_tree_hack; - pub mod open; #[cfg(test)] @@ -55,6 +51,7 @@ pub type Result<T> = std::result::Result<T, Error>; pub struct TxOpError(pub(crate) Error); pub type TxOpResult<T> = std::result::Result<T, TxOpError>; +#[derive(Debug)] pub enum TxError<E> { Abort(E), Db(Error), @@ -189,10 +186,6 @@ impl Tree { pub fn len(&self) -> Result<usize> { self.0.len(self.1) } - #[inline] - pub fn fast_len(&self) -> Result<Option<usize>> { - self.0.fast_len(self.1) - } #[inline] pub fn first(&self) -> Result<Option<(Value, Value)>> { @@ -282,6 +275,11 @@ impl<'a> Transaction<'a> { pub fn remove<T: AsRef<[u8]>>(&mut self, tree: &Tree, key: T) -> TxOpResult<Option<Value>> { self.tx.remove(tree.1, key.as_ref()) } + /// Clears all values in a tree + #[inline] + pub fn clear(&mut self, tree: &Tree) -> TxOpResult<()> { + self.tx.clear(tree.1) + } #[inline] pub fn iter(&self, tree: &Tree) -> TxOpResult<TxValueIter<'_>> { @@ -328,9 +326,6 @@ pub(crate) trait IDb: Send + Sync { fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>; fn len(&self, tree: usize) -> Result<usize>; - fn fast_len(&self, _tree: usize) -> Result<Option<usize>> { - Ok(None) - } fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>>; fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>; @@ -361,6 +356,7 @@ pub(crate) trait ITx { fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>>; fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>>; + fn clear(&mut self, tree: usize) -> TxOpResult<()>; fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>>; fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>>; diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs index 59fa132d..5ce7d3e3 100644 --- a/src/db/lmdb_adapter.rs +++ b/src/db/lmdb_adapter.rs @@ -3,6 +3,7 @@ use core::ptr::NonNull; use std::collections::HashMap; use std::convert::TryInto; +use std::pin::Pin; use std::sync::{Arc, RwLock}; use heed::types::ByteSlice; @@ -121,10 +122,6 @@ impl IDb for LmdbDb { Ok(tree.len(&tx)?.try_into().unwrap()) } - fn fast_len(&self, tree: usize) -> Result<Option<usize>> { - Ok(Some(self.len(tree)?)) - } - fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> { let tree = self.get_tree(tree)?; let mut tx = self.db.write_txn()?; @@ -242,8 +239,9 @@ impl<'a> ITx for LmdbTx<'a> { None => Ok(None), } } - fn len(&self, _tree: usize) -> TxOpResult<usize> { - unimplemented!(".len() in transaction not supported with LMDB backend") + fn len(&self, tree: usize) -> TxOpResult<usize> { + let tree = self.get_tree(tree)?; + Ok(tree.len(&self.tx)? as usize) } fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>> { @@ -258,33 +256,48 @@ impl<'a> ITx for LmdbTx<'a> { tree.delete(&mut self.tx, key)?; Ok(old_val) } + fn clear(&mut self, tree: usize) -> TxOpResult<()> { + let tree = *self.get_tree(tree)?; + tree.clear(&mut self.tx)?; + Ok(()) + } - fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> { - unimplemented!("Iterators in transactions not supported with LMDB backend"); + fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> { + let tree = *self.get_tree(tree)?; + Ok(Box::new(tree.iter(&self.tx)?.map(tx_iter_item))) } - fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> { - unimplemented!("Iterators in transactions not supported with LMDB backend"); + fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> { + let tree = *self.get_tree(tree)?; + Ok(Box::new(tree.rev_iter(&self.tx)?.map(tx_iter_item))) } fn range<'r>( &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, ) -> TxOpResult<TxValueIter<'_>> { - unimplemented!("Iterators in transactions not supported with LMDB backend"); + let tree = *self.get_tree(tree)?; + Ok(Box::new( + tree.range(&self.tx, &(low, high))?.map(tx_iter_item), + )) } fn range_rev<'r>( &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, ) -> TxOpResult<TxValueIter<'_>> { - unimplemented!("Iterators in transactions not supported with LMDB backend"); + let tree = *self.get_tree(tree)?; + Ok(Box::new( + tree.rev_range(&self.tx, &(low, high))?.map(tx_iter_item), + )) } } -// ---- +// ---- iterators outside transactions ---- +// complicated, they must hold the transaction object +// therefore a bit of unsafe code (it is a self-referential struct) type IteratorItem<'a> = heed::Result<( <ByteSlice as BytesDecode<'a>>::DItem, @@ -307,12 +320,20 @@ where where F: FnOnce(&'a RoTxn<'a>) -> Result<I>, { - let mut res = TxAndIterator { tx, iter: None }; + let res = TxAndIterator { tx, iter: None }; + let mut boxed = Box::pin(res); + + // This unsafe allows us to bypass lifetime checks + let tx = unsafe { NonNull::from(&boxed.tx).as_ref() }; + let iter = iterfun(tx)?; - let tx = unsafe { NonNull::from(&res.tx).as_ref() }; - res.iter = Some(iterfun(tx)?); + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct + unsafe { + Pin::get_unchecked_mut(mut_ref).iter = Some(iter); + } - Ok(Box::new(res)) + Ok(Box::new(TxAndIteratorPin(boxed))) } } @@ -321,18 +342,26 @@ where I: Iterator<Item = IteratorItem<'a>> + 'a, { fn drop(&mut self) { + // ensure the iterator is dropped before the RoTxn it references drop(self.iter.take()); } } -impl<'a, I> Iterator for TxAndIterator<'a, I> +struct TxAndIteratorPin<'a, I>(Pin<Box<TxAndIterator<'a, I>>>) +where + I: Iterator<Item = IteratorItem<'a>> + 'a; + +impl<'a, I> Iterator for TxAndIteratorPin<'a, I> where I: Iterator<Item = IteratorItem<'a>> + 'a, { type Item = Result<(Value, Value)>; fn next(&mut self) -> Option<Self::Item> { - match self.iter.as_mut().unwrap().next() { + let mut_ref = Pin::as_mut(&mut self.0); + // This unsafe allows us to mutably access the iterator field + let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; + match next { None => None, Some(Err(e)) => Some(Err(e.into())), Some(Ok((k, v))) => Some(Ok((k.to_vec(), v.to_vec()))), @@ -340,7 +369,16 @@ where } } -// ---- +// ---- iterators within transactions ---- + +fn tx_iter_item<'a>( + item: std::result::Result<(&'a [u8], &'a [u8]), heed::Error>, +) -> TxOpResult<(Vec<u8>, Vec<u8>)> { + item.map(|(k, v)| (k.to_vec(), v.to_vec())) + .map_err(|e| TxOpError(Error::from(e))) +} + +// ---- utility ---- #[cfg(target_pointer_width = "64")] pub fn recommended_map_size() -> usize { diff --git a/src/db/open.rs b/src/db/open.rs index ae135c4e..03476a42 100644 --- a/src/db/open.rs +++ b/src/db/open.rs @@ -11,7 +11,6 @@ use crate::{Db, Error, Result}; pub enum Engine { Lmdb, Sqlite, - Sled, } impl Engine { @@ -20,7 +19,6 @@ impl Engine { match self { Self::Lmdb => "lmdb", Self::Sqlite => "sqlite", - Self::Sled => "sled", } } } @@ -38,10 +36,10 @@ impl std::str::FromStr for Engine { match text { "lmdb" | "heed" => Ok(Self::Lmdb), "sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite), - "sled" => Ok(Self::Sled), + "sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.3).".into())), kind => Err(Error( format!( - "Invalid DB engine: {} (options are: lmdb, sled, sqlite)", + "Invalid DB engine: {} (options are: lmdb, sqlite)", kind ) .into(), @@ -53,8 +51,6 @@ impl std::str::FromStr for Engine { pub struct OpenOpt { pub fsync: bool, pub lmdb_map_size: Option<usize>, - pub sled_cache_capacity: usize, - pub sled_flush_every_ms: u64, } impl Default for OpenOpt { @@ -62,31 +58,12 @@ impl Default for OpenOpt { Self { fsync: false, lmdb_map_size: None, - sled_cache_capacity: 1024 * 1024 * 1024, - sled_flush_every_ms: 2000, } } } pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result<Db> { match engine { - // ---- Sled DB ---- - #[cfg(feature = "sled")] - Engine::Sled => { - if opt.fsync { - return Err(Error( - "`metadata_fsync = true` is not supported with the Sled database engine".into(), - )); - } - info!("Opening Sled database at: {}", path.display()); - let db = crate::sled_adapter::sled::Config::default() - .path(&path) - .cache_capacity(opt.sled_cache_capacity as u64) - .flush_every_ms(Some(opt.sled_flush_every_ms)) - .open()?; - Ok(crate::sled_adapter::SledDb::init(db)) - } - // ---- Sqlite DB ---- #[cfg(feature = "sqlite")] Engine::Sqlite => { diff --git a/src/db/sled_adapter.rs b/src/db/sled_adapter.rs deleted file mode 100644 index 84f2001b..00000000 --- a/src/db/sled_adapter.rs +++ /dev/null @@ -1,274 +0,0 @@ -use core::ops::Bound; - -use std::cell::Cell; -use std::collections::HashMap; -use std::sync::{Arc, RwLock}; - -use sled::transaction::{ - ConflictableTransactionError, TransactionError, Transactional, TransactionalTree, - UnabortableTransactionError, -}; - -use crate::{ - Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult, - TxResult, TxValueIter, Value, ValueIter, -}; - -pub use sled; - -// -- err - -impl From<sled::Error> for Error { - fn from(e: sled::Error) -> Error { - Error(format!("Sled: {}", e).into()) - } -} - -impl From<sled::Error> for TxOpError { - fn from(e: sled::Error) -> TxOpError { - TxOpError(e.into()) - } -} - -// -- db - -pub struct SledDb { - db: sled::Db, - trees: RwLock<(Vec<sled::Tree>, HashMap<String, usize>)>, -} - -impl SledDb { - #[deprecated( - since = "0.9.0", - note = "The Sled database is now deprecated and will be removed in Garage v1.0. Please migrate to LMDB or Sqlite as soon as possible." - )] - pub fn init(db: sled::Db) -> Db { - tracing::warn!("-------------------- IMPORTANT WARNING !!! ----------------------"); - tracing::warn!("The Sled database is now deprecated and will be removed in Garage v1.0."); - tracing::warn!("Please migrate to LMDB or Sqlite as soon as possible."); - tracing::warn!("-----------------------------------------------------------------------"); - let s = Self { - db, - trees: RwLock::new((Vec::new(), HashMap::new())), - }; - Db(Arc::new(s)) - } - - fn get_tree(&self, i: usize) -> Result<sled::Tree> { - self.trees - .read() - .unwrap() - .0 - .get(i) - .cloned() - .ok_or_else(|| Error("invalid tree id".into())) - } -} - -impl IDb for SledDb { - fn engine(&self) -> String { - "Sled".into() - } - - fn open_tree(&self, name: &str) -> Result<usize> { - let mut trees = self.trees.write().unwrap(); - if let Some(i) = trees.1.get(name) { - Ok(*i) - } else { - let tree = self.db.open_tree(name)?; - let i = trees.0.len(); - trees.0.push(tree); - trees.1.insert(name.to_string(), i); - Ok(i) - } - } - - fn list_trees(&self) -> Result<Vec<String>> { - let mut trees = vec![]; - for name in self.db.tree_names() { - let name = std::str::from_utf8(&name) - .map_err(|e| Error(format!("{}", e).into()))? - .to_string(); - if name != "__sled__default" { - trees.push(name); - } - } - Ok(trees) - } - - // ---- - - fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>> { - let tree = self.get_tree(tree)?; - let val = tree.get(key)?; - Ok(val.map(|x| x.to_vec())) - } - - fn len(&self, tree: usize) -> Result<usize> { - let tree = self.get_tree(tree)?; - Ok(tree.len()) - } - - fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> { - let tree = self.get_tree(tree)?; - let old_val = tree.insert(key, value)?; - Ok(old_val.map(|x| x.to_vec())) - } - - fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>> { - let tree = self.get_tree(tree)?; - let old_val = tree.remove(key)?; - Ok(old_val.map(|x| x.to_vec())) - } - - fn clear(&self, tree: usize) -> Result<()> { - let tree = self.get_tree(tree)?; - tree.clear()?; - Ok(()) - } - - fn iter(&self, tree: usize) -> Result<ValueIter<'_>> { - let tree = self.get_tree(tree)?; - Ok(Box::new(tree.iter().map(|v| { - v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into) - }))) - } - - fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> { - let tree = self.get_tree(tree)?; - Ok(Box::new(tree.iter().rev().map(|v| { - v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into) - }))) - } - - fn range<'r>( - &self, - tree: usize, - low: Bound<&'r [u8]>, - high: Bound<&'r [u8]>, - ) -> Result<ValueIter<'_>> { - let tree = self.get_tree(tree)?; - Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).map(|v| { - v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into) - }))) - } - fn range_rev<'r>( - &self, - tree: usize, - low: Bound<&'r [u8]>, - high: Bound<&'r [u8]>, - ) -> Result<ValueIter<'_>> { - let tree = self.get_tree(tree)?; - Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).rev().map( - |v| v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into), - ))) - } - - // ---- - - fn transaction(&self, f: &dyn ITxFn) -> TxResult<OnCommit, ()> { - let trees = self.trees.read().unwrap(); - let res = trees.0.transaction(|txtrees| { - let mut tx = SledTx { - trees: txtrees, - err: Cell::new(None), - }; - match f.try_on(&mut tx) { - TxFnResult::Ok(on_commit) => { - assert!(tx.err.into_inner().is_none()); - Ok(on_commit) - } - TxFnResult::Abort => { - assert!(tx.err.into_inner().is_none()); - Err(ConflictableTransactionError::Abort(())) - } - TxFnResult::DbErr => { - let e = tx.err.into_inner().expect("No DB error"); - Err(e.into()) - } - } - }); - match res { - Ok(on_commit) => Ok(on_commit), - Err(TransactionError::Abort(())) => Err(TxError::Abort(())), - Err(TransactionError::Storage(s)) => Err(TxError::Db(s.into())), - } - } -} - -// ---- - -struct SledTx<'a> { - trees: &'a [TransactionalTree], - err: Cell<Option<UnabortableTransactionError>>, -} - -impl<'a> SledTx<'a> { - fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalTree> { - self.trees.get(i).ok_or_else(|| { - TxOpError(Error( - "invalid tree id (it might have been openned after the transaction started)".into(), - )) - }) - } - - fn save_error<R>( - &self, - v: std::result::Result<R, UnabortableTransactionError>, - ) -> TxOpResult<R> { - match v { - Ok(x) => Ok(x), - Err(e) => { - let txt = format!("{}", e); - self.err.set(Some(e)); - Err(TxOpError(Error(txt.into()))) - } - } - } -} - -impl<'a> ITx for SledTx<'a> { - fn get(&self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>> { - let tree = self.get_tree(tree)?; - let tmp = self.save_error(tree.get(key))?; - Ok(tmp.map(|x| x.to_vec())) - } - fn len(&self, _tree: usize) -> TxOpResult<usize> { - unimplemented!(".len() in transaction not supported with Sled backend") - } - - fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>> { - let tree = self.get_tree(tree)?; - let old_val = self.save_error(tree.insert(key, value))?; - Ok(old_val.map(|x| x.to_vec())) - } - fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>> { - let tree = self.get_tree(tree)?; - let old_val = self.save_error(tree.remove(key))?; - Ok(old_val.map(|x| x.to_vec())) - } - - fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> { - unimplemented!("Iterators in transactions not supported with Sled backend"); - } - fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> { - unimplemented!("Iterators in transactions not supported with Sled backend"); - } - - fn range<'r>( - &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, - ) -> TxOpResult<TxValueIter<'_>> { - unimplemented!("Iterators in transactions not supported with Sled backend"); - } - fn range_rev<'r>( - &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, - ) -> TxOpResult<TxValueIter<'_>> { - unimplemented!("Iterators in transactions not supported with Sled backend"); - } -} diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 9f967c66..6c556c97 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -144,10 +144,6 @@ impl IDb for SqliteDb { } } - fn fast_len(&self, tree: usize) -> Result<Option<usize>> { - Ok(Some(self.len(tree)?)) - } - fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> { trace!("insert {}: lock db", tree); let this = self.0.lock().unwrap(); @@ -367,33 +363,64 @@ impl<'a> ITx for SqliteTx<'a> { Ok(old_val) } + fn clear(&mut self, tree: usize) -> TxOpResult<()> { + let tree = self.get_tree(tree)?; + self.tx.execute(&format!("DELETE FROM {}", tree), [])?; + Ok(()) + } - fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> { - unimplemented!(); + fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> { + let tree = self.get_tree(tree)?; + let sql = format!("SELECT k, v FROM {} ORDER BY k ASC", tree); + TxValueIterator::make(self, &sql, []) } - fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> { - unimplemented!(); + fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> { + let tree = self.get_tree(tree)?; + let sql = format!("SELECT k, v FROM {} ORDER BY k DESC", tree); + TxValueIterator::make(self, &sql, []) } fn range<'r>( &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, ) -> TxOpResult<TxValueIter<'_>> { - unimplemented!(); + let tree = self.get_tree(tree)?; + + let (bounds_sql, params) = bounds_sql(low, high); + let sql = format!("SELECT k, v FROM {} {} ORDER BY k ASC", tree, bounds_sql); + + let params = params + .iter() + .map(|x| x as &dyn rusqlite::ToSql) + .collect::<Vec<_>>(); + + TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref()) } fn range_rev<'r>( &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, ) -> TxOpResult<TxValueIter<'_>> { - unimplemented!(); + let tree = self.get_tree(tree)?; + + let (bounds_sql, params) = bounds_sql(low, high); + let sql = format!("SELECT k, v FROM {} {} ORDER BY k DESC", tree, bounds_sql); + + let params = params + .iter() + .map(|x| x as &dyn rusqlite::ToSql) + .collect::<Vec<_>>(); + + TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref()) } } -// ---- +// ---- iterators outside transactions ---- +// complicated, they must hold the Statement and Row objects +// therefore quite some unsafe code (it is a self-referential struct) struct DbValueIterator<'a> { db: MutexGuard<'a, SqliteDbInner>, @@ -417,17 +444,23 @@ impl<'a> DbValueIterator<'a> { let mut boxed = Box::pin(res); trace!("make iterator with sql: {}", sql); - unsafe { - let db = NonNull::from(&boxed.db); - let stmt = db.as_ref().db.prepare(sql)?; + // This unsafe allows us to bypass lifetime checks + let db = unsafe { NonNull::from(&boxed.db).as_ref() }; + let stmt = db.db.prepare(sql)?; - let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed); + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct + unsafe { Pin::get_unchecked_mut(mut_ref).stmt = Some(stmt); + } - let mut stmt = NonNull::from(&boxed.stmt); - let iter = stmt.as_mut().as_mut().unwrap().query(args)?; + // This unsafe allows us to bypass lifetime checks + let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() }; + let iter = stmt.as_mut().unwrap().query(args)?; - let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed); + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct + unsafe { Pin::get_unchecked_mut(mut_ref).iter = Some(iter); } @@ -449,28 +482,73 @@ impl<'a> Iterator for DbValueIteratorPin<'a> { type Item = Result<(Value, Value)>; fn next(&mut self) -> Option<Self::Item> { - let next = unsafe { - let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut self.0); - Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() - }; - let row = match next { - Err(e) => return Some(Err(e.into())), - Ok(None) => return None, - Ok(Some(r)) => r, - }; - let k = match row.get::<_, Vec<u8>>(0) { - Err(e) => return Some(Err(e.into())), - Ok(x) => x, - }; - let v = match row.get::<_, Vec<u8>>(1) { - Err(e) => return Some(Err(e.into())), - Ok(y) => y, + let mut_ref = Pin::as_mut(&mut self.0); + // This unsafe allows us to mutably access the iterator field + let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; + iter_next_row(next) + } +} + +// ---- iterators within transactions ---- +// it's the same except we don't hold a mutex guard, +// only a Statement and a Rows object + +struct TxValueIterator<'a> { + stmt: Statement<'a>, + iter: Option<Rows<'a>>, + _pin: PhantomPinned, +} + +impl<'a> TxValueIterator<'a> { + fn make<P: rusqlite::Params>( + tx: &'a SqliteTx<'a>, + sql: &str, + args: P, + ) -> TxOpResult<TxValueIter<'a>> { + let stmt = tx.tx.prepare(sql)?; + let res = TxValueIterator { + stmt, + iter: None, + _pin: PhantomPinned, }; - Some(Ok((k, v))) + let mut boxed = Box::pin(res); + trace!("make iterator with sql: {}", sql); + + // This unsafe allows us to bypass lifetime checks + let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() }; + let iter = stmt.query(args)?; + + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct + unsafe { + Pin::get_unchecked_mut(mut_ref).iter = Some(iter); + } + + Ok(Box::new(TxValueIteratorPin(boxed))) } } -// ---- +impl<'a> Drop for TxValueIterator<'a> { + fn drop(&mut self) { + trace!("drop iter"); + drop(self.iter.take()); + } +} + +struct TxValueIteratorPin<'a>(Pin<Box<TxValueIterator<'a>>>); + +impl<'a> Iterator for TxValueIteratorPin<'a> { + type Item = TxOpResult<(Value, Value)>; + + fn next(&mut self) -> Option<Self::Item> { + let mut_ref = Pin::as_mut(&mut self.0); + // This unsafe allows us to mutably access the iterator field + let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; + iter_next_row(next) + } +} + +// ---- utility ---- fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec<Vec<u8>>) { let mut sql = String::new(); @@ -510,3 +588,25 @@ fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec<V (sql, params) } + +fn iter_next_row<E>( + next_row: rusqlite::Result<Option<&rusqlite::Row>>, +) -> Option<std::result::Result<(Value, Value), E>> +where + E: From<rusqlite::Error>, +{ + let row = match next_row { + Err(e) => return Some(Err(e.into())), + Ok(None) => return None, + Ok(Some(r)) => r, + }; + let k = match row.get::<_, Vec<u8>>(0) { + Err(e) => return Some(Err(e.into())), + Ok(x) => x, + }; + let v = match row.get::<_, Vec<u8>>(1) { + Err(e) => return Some(Err(e.into())), + Ok(y) => y, + }; + Some(Ok((k, v))) +} diff --git a/src/db/test.rs b/src/db/test.rs index cd99eafa..3add89fb 100644 --- a/src/db/test.rs +++ b/src/db/test.rs @@ -10,8 +10,13 @@ fn test_suite(db: Db) { let vb: &[u8] = &b"plip"[..]; let vc: &[u8] = &b"plup"[..]; + // ---- test simple insert/delete ---- + assert!(tree.insert(ka, va).unwrap().is_none()); assert_eq!(tree.get(ka).unwrap().unwrap(), va); + assert_eq!(tree.len().unwrap(), 1); + + // ---- test transaction logic ---- let res = db.transaction::<_, (), _>(|tx| { assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), va); @@ -37,6 +42,8 @@ fn test_suite(db: Db) { assert!(matches!(res, Err(TxError::Abort(42)))); assert_eq!(tree.get(ka).unwrap().unwrap(), vb); + // ---- test iteration outside of transactions ---- + let mut iter = tree.iter().unwrap(); let next = iter.next().unwrap().unwrap(); assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); @@ -73,6 +80,48 @@ fn test_suite(db: Db) { assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); assert!(iter.next().is_none()); drop(iter); + + // ---- test iteration within transactions ---- + + db.transaction::<_, (), _>(|tx| { + let mut iter = tx.iter(&tree).unwrap(); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc)); + assert!(iter.next().is_none()); + Ok(()) + }) + .unwrap(); + + db.transaction::<_, (), _>(|tx| { + let mut iter = tx.range(&tree, kint..).unwrap(); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc)); + assert!(iter.next().is_none()); + Ok(()) + }) + .unwrap(); + + db.transaction::<_, (), _>(|tx| { + let mut iter = tx.range_rev(&tree, ..kint).unwrap(); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); + assert!(iter.next().is_none()); + Ok(()) + }) + .unwrap(); + + db.transaction::<_, (), _>(|tx| { + let mut iter = tx.iter_rev(&tree).unwrap(); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc)); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); + assert!(iter.next().is_none()); + Ok(()) + }) + .unwrap(); } #[test] @@ -91,17 +140,6 @@ fn test_lmdb_db() { } #[test] -#[cfg(feature = "sled")] -fn test_sled_db() { - use crate::sled_adapter::SledDb; - - let path = mktemp::Temp::new_dir().unwrap(); - let db = SledDb::init(sled::open(path.to_path_buf()).unwrap()); - test_suite(db); - drop(path); -} - -#[test] #[cfg(feature = "sqlite")] fn test_sqlite_db() { use crate::sqlite_adapter::SqliteDb; diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 00ecb35e..53449a1c 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -80,12 +80,11 @@ k2v-client.workspace = true [features] -default = [ "bundled-libs", "metrics", "sled", "lmdb", "sqlite", "k2v" ] +default = [ "bundled-libs", "metrics", "lmdb", "sqlite", "k2v" ] k2v = [ "garage_util/k2v", "garage_api/k2v" ] -# Database engines, Sled is still our default even though we don't like it -sled = [ "garage_model/sled" ] +# Database engines lmdb = [ "garage_model/lmdb" ] sqlite = [ "garage_model/sqlite" ] diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index 073693ed..300e1211 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -197,11 +197,11 @@ impl AdminRpcHandler { // Gather table statistics let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()]; - table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?); + table.push(self.gather_table_stats(&self.garage.bucket_table)?); + table.push(self.gather_table_stats(&self.garage.key_table)?); + table.push(self.gather_table_stats(&self.garage.object_table)?); + table.push(self.gather_table_stats(&self.garage.version_table)?); + table.push(self.gather_table_stats(&self.garage.block_ref_table)?); write!( &mut ret, "\nTable stats:\n{}", @@ -211,15 +211,7 @@ impl AdminRpcHandler { // Gather block manager statistics writeln!(&mut ret, "\nBlock manager stats:").unwrap(); - let rc_len = if opt.detailed { - self.garage.block_manager.rc_len()?.to_string() - } else { - self.garage - .block_manager - .rc_fast_len()? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()) - }; + let rc_len = self.garage.block_manager.rc_len()?.to_string(); writeln!( &mut ret, @@ -240,10 +232,6 @@ impl AdminRpcHandler { ) .unwrap(); - if !opt.detailed { - writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap(); - } - if !opt.skip_global { write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap(); } @@ -345,34 +333,13 @@ impl AdminRpcHandler { ret } - fn gather_table_stats<F, R>( - &self, - t: &Arc<Table<F, R>>, - detailed: bool, - ) -> Result<String, Error> + fn gather_table_stats<F, R>(&self, t: &Arc<Table<F, R>>) -> Result<String, Error> where F: TableSchema + 'static, R: TableReplication + 'static, { - let (data_len, mkl_len) = if detailed { - ( - t.data.store.len().map_err(GarageError::from)?.to_string(), - t.merkle_updater.merkle_tree_len()?.to_string(), - ) - } else { - ( - t.data - .store - .fast_len() - .map_err(GarageError::from)? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()), - t.merkle_updater - .merkle_tree_fast_len()? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()), - ) - }; + let data_len = t.data.store.len().map_err(GarageError::from)?.to_string(); + let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string(); Ok(format!( " {}\t{}\t{}\t{}\t{}", diff --git a/src/garage/cli/convert_db.rs b/src/garage/cli/convert_db.rs index 2aadb1d6..5346d55a 100644 --- a/src/garage/cli/convert_db.rs +++ b/src/garage/cli/convert_db.rs @@ -11,7 +11,7 @@ pub struct ConvertDbOpt { /// https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0) #[structopt(short = "i")] input_path: PathBuf, - /// Input database engine (sled, lmdb or sqlite; limited by db engines + /// Input database engine (lmdb or sqlite; limited by db engines /// enabled in this build) #[structopt(short = "a")] input_engine: Engine, diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 63014dbc..a5e2e6e8 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -531,10 +531,6 @@ pub struct StatsOpt { #[structopt(short = "a", long = "all-nodes")] pub all_nodes: bool, - /// Gather detailed statistics (this can be long) - #[structopt(short = "d", long = "detailed")] - pub detailed: bool, - /// Don't show global cluster stats (internal use in RPC) #[structopt(skip)] #[serde(default)] diff --git a/src/garage/main.rs b/src/garage/main.rs index e489fff0..5e9c061f 100644 --- a/src/garage/main.rs +++ b/src/garage/main.rs @@ -18,8 +18,8 @@ compile_error!("Either bundled-libs or system-libs Cargo feature must be enabled #[cfg(all(feature = "bundled-libs", feature = "system-libs"))] compile_error!("Only one of bundled-libs and system-libs Cargo features must be enabled"); -#[cfg(not(any(feature = "lmdb", feature = "sled", feature = "sqlite")))] -compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb, sled or sqlite."); +#[cfg(not(any(feature = "lmdb", feature = "sqlite")))] +compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb or sqlite."); use std::net::SocketAddr; use std::path::PathBuf; @@ -72,8 +72,6 @@ async fn main() { let features = &[ #[cfg(feature = "k2v")] "k2v", - #[cfg(feature = "sled")] - "sled", #[cfg(feature = "lmdb")] "lmdb", #[cfg(feature = "sqlite")] diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 776671d0..a6bcfbe7 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -42,8 +42,7 @@ tokio.workspace = true opentelemetry.workspace = true [features] -default = [ "sled", "lmdb", "sqlite" ] +default = [ "lmdb", "sqlite" ] k2v = [ "garage_util/k2v" ] lmdb = [ "garage_db/lmdb" ] -sled = [ "garage_db/sled" ] sqlite = [ "garage_db/sqlite" ] diff --git a/src/model/garage.rs b/src/model/garage.rs index 7ec8b22e..8987c594 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -118,9 +118,6 @@ impl Garage { .ok_or_message("Invalid `db_engine` value in configuration file")?; let mut db_path = config.metadata_dir.clone(); match db_engine { - db::Engine::Sled => { - db_path.push("db"); - } db::Engine::Sqlite => { db_path.push("db.sqlite"); } @@ -134,8 +131,6 @@ impl Garage { v if v == usize::default() => None, v => Some(v), }, - sled_cache_capacity: config.sled_cache_capacity, - sled_flush_every_ms: config.sled_flush_every_ms, }; let db = db::open_db(&db_path, db_engine, &db_opt) .ok_or_message("Unable to open metadata db")?; diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 50d4283f..9ecf168c 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -121,13 +121,7 @@ impl Worker for LifecycleWorker { mpu_aborted, .. } => { - let n_objects = self - .garage - .object_table - .data - .store - .fast_len() - .unwrap_or(None); + let n_objects = self.garage.object_table.data.store.len().ok(); let progress = match n_objects { None => "...".to_string(), Some(total) => format!( diff --git a/src/table/data.rs b/src/table/data.rs index 7f6b7847..09f4e008 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -6,7 +6,6 @@ use serde_bytes::ByteBuf; use tokio::sync::Notify; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; use garage_util::data::*; use garage_util::error::*; @@ -36,7 +35,7 @@ pub struct TableData<F: TableSchema, R: TableReplication> { pub(crate) insert_queue: db::Tree, pub(crate) insert_queue_notify: Arc<Notify>, - pub(crate) gc_todo: CountedTree, + pub(crate) gc_todo: db::Tree, pub(crate) metrics: TableMetrics, } @@ -61,7 +60,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> { let gc_todo = db .open_tree(format!("{}:gc_todo_v2", F::TABLE_NAME)) .expect("Unable to open GC DB tree"); - let gc_todo = CountedTree::new(gc_todo).expect("Cannot count gc_todo_v2"); let metrics = TableMetrics::new( F::TABLE_NAME, @@ -370,6 +368,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> { } pub fn gc_todo_len(&self) -> Result<usize, Error> { - Ok(self.gc_todo.len()) + Ok(self.gc_todo.len()?) } } diff --git a/src/table/gc.rs b/src/table/gc.rs index ef788749..d30a1849 100644 --- a/src/table/gc.rs +++ b/src/table/gc.rs @@ -10,7 +10,7 @@ use serde_bytes::ByteBuf; use futures::future::join_all; use tokio::sync::watch; -use garage_db::counted_tree_hack::CountedTree; +use garage_db as db; use garage_util::background::*; use garage_util::data::*; @@ -334,9 +334,9 @@ impl<F: TableSchema, R: TableReplication> Worker for GcWorker<F, R> { } } -/// An entry stored in the gc_todo Sled tree associated with the table +/// An entry stored in the gc_todo db tree associated with the table /// Contains helper function for parsing, saving, and removing -/// such entry in Sled +/// such entry in the db /// /// Format of an entry: /// - key = 8 bytes: timestamp of tombstone @@ -353,7 +353,7 @@ pub(crate) struct GcTodoEntry { } impl GcTodoEntry { - /// Creates a new GcTodoEntry (not saved in Sled) from its components: + /// Creates a new GcTodoEntry (not saved in the db) from its components: /// the key of an entry in the table, and the hash of the associated /// serialized value pub(crate) fn new(key: Vec<u8>, value_hash: Hash) -> Self { @@ -376,7 +376,7 @@ impl GcTodoEntry { } /// Saves the GcTodoEntry in the gc_todo tree - pub(crate) fn save(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> { + pub(crate) fn save(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> { gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?; Ok(()) } @@ -386,12 +386,14 @@ impl GcTodoEntry { /// This is usefull to remove a todo entry only under the condition /// that it has not changed since the time it was read, i.e. /// what we have to do is still the same - pub(crate) fn remove_if_equal(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> { - gc_todo_tree.compare_and_swap::<_, _, &[u8]>( - &self.todo_table_key(), - Some(self.value_hash), - None, - )?; + pub(crate) fn remove_if_equal(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> { + gc_todo_tree.db().transaction(|txn| { + let key = self.todo_table_key(); + if txn.get(gc_todo_tree, &key)?.as_deref() == Some(self.value_hash.as_slice()) { + txn.remove(gc_todo_tree, &key)?; + } + Ok(()) + })?; Ok(()) } diff --git a/src/table/merkle.rs b/src/table/merkle.rs index 01271c58..596d5805 100644 --- a/src/table/merkle.rs +++ b/src/table/merkle.rs @@ -31,14 +31,14 @@ pub struct MerkleUpdater<F: TableSchema, R: TableReplication> { // - value = the hash of the full serialized item, if present, // or an empty vec if item is absent (deleted) // Fields in data: - // pub(crate) merkle_todo: sled::Tree, + // pub(crate) merkle_todo: db::Tree, // pub(crate) merkle_todo_notify: Notify, // Content of the merkle tree: items where // - key = .bytes() for MerkleNodeKey // - value = serialization of a MerkleNode, assumed to be MerkleNode::empty if not found // Field in data: - // pub(crate) merkle_tree: sled::Tree, + // pub(crate) merkle_tree: db::Tree, empty_node_hash: Hash, } @@ -291,10 +291,6 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> { Ok(self.data.merkle_tree.len()?) } - pub fn merkle_tree_fast_len(&self) -> Result<Option<usize>, Error> { - Ok(self.data.merkle_tree.fast_len()?) - } - pub fn todo_len(&self) -> Result<usize, Error> { Ok(self.data.merkle_todo.len()?) } diff --git a/src/table/metrics.rs b/src/table/metrics.rs index 8318a84f..7bb0959a 100644 --- a/src/table/metrics.rs +++ b/src/table/metrics.rs @@ -1,7 +1,6 @@ use opentelemetry::{global, metrics::*, KeyValue}; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; /// TableMetrics reference all counter used for metrics pub struct TableMetrics { @@ -27,7 +26,7 @@ impl TableMetrics { store: db::Tree, merkle_tree: db::Tree, merkle_todo: db::Tree, - gc_todo: CountedTree, + gc_todo: db::Tree, ) -> Self { let meter = global::meter(table_name); TableMetrics { @@ -35,9 +34,9 @@ impl TableMetrics { .u64_value_observer( "table.size", move |observer| { - if let Ok(Some(v)) = store.fast_len() { + if let Ok(value) = store.len() { observer.observe( - v as u64, + value as u64, &[KeyValue::new("table_name", table_name)], ); } @@ -49,9 +48,9 @@ impl TableMetrics { .u64_value_observer( "table.merkle_tree_size", move |observer| { - if let Ok(Some(v)) = merkle_tree.fast_len() { + if let Ok(value) = merkle_tree.len() { observer.observe( - v as u64, + value as u64, &[KeyValue::new("table_name", table_name)], ); } @@ -77,10 +76,12 @@ impl TableMetrics { .u64_value_observer( "table.gc_todo_queue_length", move |observer| { - observer.observe( - gc_todo.len() as u64, - &[KeyValue::new("table_name", table_name)], - ); + if let Ok(value) = gc_todo.len() { + observer.observe( + value as u64, + &[KeyValue::new("table_name", table_name)], + ); + } }, ) .with_description("Table garbage collector TODO queue length") diff --git a/src/util/config.rs b/src/util/config.rs index b7f27676..e243c813 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -87,20 +87,10 @@ pub struct Config { pub kubernetes_discovery: Option<KubernetesDiscoveryConfig>, // -- DB - /// Database engine to use for metadata (options: sled, sqlite, lmdb) + /// Database engine to use for metadata (options: sqlite, lmdb) #[serde(default = "default_db_engine")] pub db_engine: String, - /// Sled cache size, in bytes - #[serde( - deserialize_with = "deserialize_capacity", - default = "default_sled_cache_capacity" - )] - pub sled_cache_capacity: usize, - /// Sled flush interval in milliseconds - #[serde(default = "default_sled_flush_every_ms")] - pub sled_flush_every_ms: u64, - /// LMDB map size #[serde(deserialize_with = "deserialize_capacity", default)] pub lmdb_map_size: usize, @@ -246,13 +236,6 @@ fn default_db_engine() -> String { "lmdb".into() } -fn default_sled_cache_capacity() -> usize { - 128 * 1024 * 1024 -} -fn default_sled_flush_every_ms() -> u64 { - 2000 -} - fn default_block_size() -> usize { 1048576 } |