aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2024-03-12 10:45:57 +0000
committerAlex <alex@adnab.me>2024-03-12 10:45:57 +0000
commit81191d2d92e58ff82ace0f4d82b275c157673ade (patch)
tree53396e8ddbb94c4cc2f7f037a66c12a3c125c22f /src
parent2128b5febd97eb6c95e3bdd380dcba39b860c0e3 (diff)
parent2795b53b8b3ebd162df6b0244b73889e72f67ce0 (diff)
downloadgarage-81191d2d92e58ff82ace0f4d82b275c157673ade.tar.gz
garage-81191d2d92e58ff82ace0f4d82b275c157673ade.zip
Merge pull request 'Remove Sled' (#767) from rm-sled into next-0.10
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/767
Diffstat (limited to 'src')
-rw-r--r--src/block/manager.rs7
-rw-r--r--src/block/metrics.rs17
-rw-r--r--src/block/resync.rs21
-rw-r--r--src/db/Cargo.toml3
-rw-r--r--src/db/counted_tree_hack.rs127
-rw-r--r--src/db/lib.rs18
-rw-r--r--src/db/lmdb_adapter.rs90
-rw-r--r--src/db/open.rs27
-rw-r--r--src/db/sled_adapter.rs274
-rw-r--r--src/db/sqlite_adapter.rs184
-rw-r--r--src/db/test.rs60
-rw-r--r--src/garage/Cargo.toml5
-rw-r--r--src/garage/admin/mod.rs51
-rw-r--r--src/garage/cli/convert_db.rs2
-rw-r--r--src/garage/cli/structs.rs4
-rw-r--r--src/garage/main.rs6
-rw-r--r--src/model/Cargo.toml3
-rw-r--r--src/model/garage.rs5
-rw-r--r--src/model/s3/lifecycle_worker.rs8
-rw-r--r--src/table/data.rs6
-rw-r--r--src/table/gc.rs24
-rw-r--r--src/table/merkle.rs8
-rw-r--r--src/table/metrics.rs21
-rw-r--r--src/util/config.rs19
24 files changed, 328 insertions, 662 deletions
diff --git a/src/block/manager.rs b/src/block/manager.rs
index c7e4df17..18fadf85 100644
--- a/src/block/manager.rs
+++ b/src/block/manager.rs
@@ -378,11 +378,6 @@ impl BlockManager {
Ok(self.rc.rc.len()?)
}
- /// Get number of items in the refcount table
- pub fn rc_fast_len(&self) -> Result<Option<usize>, Error> {
- Ok(self.rc.rc.fast_len()?)
- }
-
/// Send command to start/stop/manager scrub worker
pub async fn send_scrub_command(&self, cmd: ScrubWorkerCommand) -> Result<(), Error> {
let tx = self.tx_scrub_command.load();
@@ -398,7 +393,7 @@ impl BlockManager {
/// List all resync errors
pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> {
- let mut blocks = Vec::with_capacity(self.resync.errors.len());
+ let mut blocks = Vec::with_capacity(self.resync.errors.len()?);
for ent in self.resync.errors.iter()? {
let (hash, cnt) = ent?;
let cnt = ErrorCounter::decode(&cnt);
diff --git a/src/block/metrics.rs b/src/block/metrics.rs
index 6659df32..8e10afdf 100644
--- a/src/block/metrics.rs
+++ b/src/block/metrics.rs
@@ -1,7 +1,6 @@
use opentelemetry::{global, metrics::*};
use garage_db as db;
-use garage_db::counted_tree_hack::CountedTree;
/// TableMetrics reference all counter used for metrics
pub struct BlockManagerMetrics {
@@ -29,8 +28,8 @@ impl BlockManagerMetrics {
pub fn new(
compression_level: Option<i32>,
rc_tree: db::Tree,
- resync_queue: CountedTree,
- resync_errors: CountedTree,
+ resync_queue: db::Tree,
+ resync_errors: db::Tree,
) -> Self {
let meter = global::meter("garage_model/block");
Self {
@@ -45,15 +44,17 @@ impl BlockManagerMetrics {
.init(),
_rc_size: meter
.u64_value_observer("block.rc_size", move |observer| {
- if let Ok(Some(v)) = rc_tree.fast_len() {
- observer.observe(v as u64, &[])
+ if let Ok(value) = rc_tree.len() {
+ observer.observe(value as u64, &[])
}
})
.with_description("Number of blocks known to the reference counter")
.init(),
_resync_queue_len: meter
.u64_value_observer("block.resync_queue_length", move |observer| {
- observer.observe(resync_queue.len() as u64, &[])
+ if let Ok(value) = resync_queue.len() {
+ observer.observe(value as u64, &[]);
+ }
})
.with_description(
"Number of block hashes queued for local check and possible resync",
@@ -61,7 +62,9 @@ impl BlockManagerMetrics {
.init(),
_resync_errored_blocks: meter
.u64_value_observer("block.resync_errored_blocks", move |observer| {
- observer.observe(resync_errors.len() as u64, &[])
+ if let Ok(value) = resync_errors.len() {
+ observer.observe(value as u64, &[]);
+ }
})
.with_description("Number of block hashes whose last resync resulted in an error")
.init(),
diff --git a/src/block/resync.rs b/src/block/resync.rs
index 15f210e4..48c2cef1 100644
--- a/src/block/resync.rs
+++ b/src/block/resync.rs
@@ -15,7 +15,6 @@ use opentelemetry::{
};
use garage_db as db;
-use garage_db::counted_tree_hack::CountedTree;
use garage_util::background::*;
use garage_util::data::*;
@@ -47,9 +46,9 @@ pub(crate) const MAX_RESYNC_WORKERS: usize = 8;
const INITIAL_RESYNC_TRANQUILITY: u32 = 2;
pub struct BlockResyncManager {
- pub(crate) queue: CountedTree,
+ pub(crate) queue: db::Tree,
pub(crate) notify: Arc<Notify>,
- pub(crate) errors: CountedTree,
+ pub(crate) errors: db::Tree,
busy_set: BusySet,
@@ -90,12 +89,10 @@ impl BlockResyncManager {
let queue = db
.open_tree("block_local_resync_queue")
.expect("Unable to open block_local_resync_queue tree");
- let queue = CountedTree::new(queue).expect("Could not count block_local_resync_queue");
let errors = db
.open_tree("block_local_resync_errors")
.expect("Unable to open block_local_resync_errors tree");
- let errors = CountedTree::new(errors).expect("Could not count block_local_resync_errors");
let persister = PersisterShared::new(&system.metadata_dir, "resync_cfg");
@@ -110,16 +107,12 @@ impl BlockResyncManager {
/// Get lenght of resync queue
pub fn queue_len(&self) -> Result<usize, Error> {
- // This currently can't return an error because the CountedTree hack
- // doesn't error on .len(), but this will change when we remove the hack
- // (hopefully someday!)
- Ok(self.queue.len())
+ Ok(self.queue.len()?)
}
/// Get number of blocks that have an error
pub fn errors_len(&self) -> Result<usize, Error> {
- // (see queue_len comment)
- Ok(self.errors.len())
+ Ok(self.errors.len()?)
}
/// Clear the error counter for a block and put it in queue immediately
@@ -180,7 +173,7 @@ impl BlockResyncManager {
// deleted once the garbage collection delay has passed.
//
// Here are some explanations on how the resync queue works.
- // There are two Sled trees that are used to have information
+ // There are two db trees that are used to have information
// about the status of blocks that need to be resynchronized:
//
// - resync.queue: a tree that is ordered first by a timestamp
@@ -541,9 +534,9 @@ impl Worker for ResyncWorker {
Ok(WorkerState::Idle)
}
Err(e) => {
- // The errors that we have here are only Sled errors
+ // The errors that we have here are only db errors
// We don't really know how to handle them so just ¯\_(ツ)_/¯
- // (there is kind of an assumption that Sled won't error on us,
+ // (there is kind of an assumption that the db won't error on us,
// if it does there is not much we can do -- TODO should we just panic?)
// Here we just give the error to the worker manager,
// it will print it to the logs and increment a counter
diff --git a/src/db/Cargo.toml b/src/db/Cargo.toml
index fddc5cca..a8f6d586 100644
--- a/src/db/Cargo.toml
+++ b/src/db/Cargo.toml
@@ -18,13 +18,12 @@ tracing.workspace = true
heed = { workspace = true, optional = true }
rusqlite = { workspace = true, optional = true }
-sled = { workspace = true, optional = true }
[dev-dependencies]
mktemp.workspace = true
[features]
-default = [ "sled", "lmdb", "sqlite" ]
+default = [ "lmdb", "sqlite" ]
bundled-libs = [ "rusqlite?/bundled" ]
lmdb = [ "heed" ]
sqlite = [ "rusqlite" ]
diff --git a/src/db/counted_tree_hack.rs b/src/db/counted_tree_hack.rs
deleted file mode 100644
index a4ce12e0..00000000
--- a/src/db/counted_tree_hack.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-//! This hack allows a db tree to keep in RAM a counter of the number of entries
-//! it contains, which is used to call .len() on it. This is usefull only for
-//! the sled backend where .len() otherwise would have to traverse the whole
-//! tree to count items. For sqlite and lmdb, this is mostly useless (but
-//! hopefully not harmfull!). Note that a CountedTree cannot be part of a
-//! transaction.
-
-use std::sync::{
- atomic::{AtomicUsize, Ordering},
- Arc,
-};
-
-use crate::{Result, Tree, TxError, Value, ValueIter};
-
-#[derive(Clone)]
-pub struct CountedTree(Arc<CountedTreeInternal>);
-
-struct CountedTreeInternal {
- tree: Tree,
- len: AtomicUsize,
-}
-
-impl CountedTree {
- pub fn new(tree: Tree) -> Result<Self> {
- let len = tree.len()?;
- Ok(Self(Arc::new(CountedTreeInternal {
- tree,
- len: AtomicUsize::new(len),
- })))
- }
-
- pub fn len(&self) -> usize {
- self.0.len.load(Ordering::SeqCst)
- }
-
- pub fn is_empty(&self) -> bool {
- self.len() == 0
- }
-
- pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
- self.0.tree.get(key)
- }
-
- pub fn first(&self) -> Result<Option<(Value, Value)>> {
- self.0.tree.first()
- }
-
- pub fn iter(&self) -> Result<ValueIter<'_>> {
- self.0.tree.iter()
- }
-
- // ---- writing functions ----
-
- pub fn insert<K, V>(&self, key: K, value: V) -> Result<Option<Value>>
- where
- K: AsRef<[u8]>,
- V: AsRef<[u8]>,
- {
- let old_val = self.0.tree.insert(key, value)?;
- if old_val.is_none() {
- self.0.len.fetch_add(1, Ordering::SeqCst);
- }
- Ok(old_val)
- }
-
- pub fn remove<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
- let old_val = self.0.tree.remove(key)?;
- if old_val.is_some() {
- self.0.len.fetch_sub(1, Ordering::SeqCst);
- }
- Ok(old_val)
- }
-
- pub fn compare_and_swap<K, OV, NV>(
- &self,
- key: K,
- expected_old: Option<OV>,
- new: Option<NV>,
- ) -> Result<bool>
- where
- K: AsRef<[u8]>,
- OV: AsRef<[u8]>,
- NV: AsRef<[u8]>,
- {
- let old_some = expected_old.is_some();
- let new_some = new.is_some();
-
- let tx_res = self.0.tree.db().transaction(|tx| {
- let old_val = tx.get(&self.0.tree, &key)?;
- let is_same = match (&old_val, &expected_old) {
- (None, None) => true,
- (Some(x), Some(y)) if x == y.as_ref() => true,
- _ => false,
- };
- if is_same {
- match &new {
- Some(v) => {
- tx.insert(&self.0.tree, &key, v)?;
- }
- None => {
- tx.remove(&self.0.tree, &key)?;
- }
- }
- Ok(())
- } else {
- Err(TxError::Abort(()))
- }
- });
-
- match tx_res {
- Ok(()) => {
- match (old_some, new_some) {
- (false, true) => {
- self.0.len.fetch_add(1, Ordering::SeqCst);
- }
- (true, false) => {
- self.0.len.fetch_sub(1, Ordering::SeqCst);
- }
- _ => (),
- }
- Ok(true)
- }
- Err(TxError::Abort(())) => Ok(false),
- Err(TxError::Db(e)) => Err(e),
- }
- }
-}
diff --git a/src/db/lib.rs b/src/db/lib.rs
index 0fb457ce..ff511b5f 100644
--- a/src/db/lib.rs
+++ b/src/db/lib.rs
@@ -3,13 +3,9 @@ extern crate tracing;
#[cfg(feature = "lmdb")]
pub mod lmdb_adapter;
-#[cfg(feature = "sled")]
-pub mod sled_adapter;
#[cfg(feature = "sqlite")]
pub mod sqlite_adapter;
-pub mod counted_tree_hack;
-
pub mod open;
#[cfg(test)]
@@ -55,6 +51,7 @@ pub type Result<T> = std::result::Result<T, Error>;
pub struct TxOpError(pub(crate) Error);
pub type TxOpResult<T> = std::result::Result<T, TxOpError>;
+#[derive(Debug)]
pub enum TxError<E> {
Abort(E),
Db(Error),
@@ -189,10 +186,6 @@ impl Tree {
pub fn len(&self) -> Result<usize> {
self.0.len(self.1)
}
- #[inline]
- pub fn fast_len(&self) -> Result<Option<usize>> {
- self.0.fast_len(self.1)
- }
#[inline]
pub fn first(&self) -> Result<Option<(Value, Value)>> {
@@ -282,6 +275,11 @@ impl<'a> Transaction<'a> {
pub fn remove<T: AsRef<[u8]>>(&mut self, tree: &Tree, key: T) -> TxOpResult<Option<Value>> {
self.tx.remove(tree.1, key.as_ref())
}
+ /// Clears all values in a tree
+ #[inline]
+ pub fn clear(&mut self, tree: &Tree) -> TxOpResult<()> {
+ self.tx.clear(tree.1)
+ }
#[inline]
pub fn iter(&self, tree: &Tree) -> TxOpResult<TxValueIter<'_>> {
@@ -328,9 +326,6 @@ pub(crate) trait IDb: Send + Sync {
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
fn len(&self, tree: usize) -> Result<usize>;
- fn fast_len(&self, _tree: usize) -> Result<Option<usize>> {
- Ok(None)
- }
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>>;
fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
@@ -361,6 +356,7 @@ pub(crate) trait ITx {
fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>>;
fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>>;
+ fn clear(&mut self, tree: usize) -> TxOpResult<()>;
fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>>;
fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>>;
diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs
index 59fa132d..5ce7d3e3 100644
--- a/src/db/lmdb_adapter.rs
+++ b/src/db/lmdb_adapter.rs
@@ -3,6 +3,7 @@ use core::ptr::NonNull;
use std::collections::HashMap;
use std::convert::TryInto;
+use std::pin::Pin;
use std::sync::{Arc, RwLock};
use heed::types::ByteSlice;
@@ -121,10 +122,6 @@ impl IDb for LmdbDb {
Ok(tree.len(&tx)?.try_into().unwrap())
}
- fn fast_len(&self, tree: usize) -> Result<Option<usize>> {
- Ok(Some(self.len(tree)?))
- }
-
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
let tree = self.get_tree(tree)?;
let mut tx = self.db.write_txn()?;
@@ -242,8 +239,9 @@ impl<'a> ITx for LmdbTx<'a> {
None => Ok(None),
}
}
- fn len(&self, _tree: usize) -> TxOpResult<usize> {
- unimplemented!(".len() in transaction not supported with LMDB backend")
+ fn len(&self, tree: usize) -> TxOpResult<usize> {
+ let tree = self.get_tree(tree)?;
+ Ok(tree.len(&self.tx)? as usize)
}
fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>> {
@@ -258,33 +256,48 @@ impl<'a> ITx for LmdbTx<'a> {
tree.delete(&mut self.tx, key)?;
Ok(old_val)
}
+ fn clear(&mut self, tree: usize) -> TxOpResult<()> {
+ let tree = *self.get_tree(tree)?;
+ tree.clear(&mut self.tx)?;
+ Ok(())
+ }
- fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!("Iterators in transactions not supported with LMDB backend");
+ fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
+ let tree = *self.get_tree(tree)?;
+ Ok(Box::new(tree.iter(&self.tx)?.map(tx_iter_item)))
}
- fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!("Iterators in transactions not supported with LMDB backend");
+ fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
+ let tree = *self.get_tree(tree)?;
+ Ok(Box::new(tree.rev_iter(&self.tx)?.map(tx_iter_item)))
}
fn range<'r>(
&self,
- _tree: usize,
- _low: Bound<&'r [u8]>,
- _high: Bound<&'r [u8]>,
+ tree: usize,
+ low: Bound<&'r [u8]>,
+ high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!("Iterators in transactions not supported with LMDB backend");
+ let tree = *self.get_tree(tree)?;
+ Ok(Box::new(
+ tree.range(&self.tx, &(low, high))?.map(tx_iter_item),
+ ))
}
fn range_rev<'r>(
&self,
- _tree: usize,
- _low: Bound<&'r [u8]>,
- _high: Bound<&'r [u8]>,
+ tree: usize,
+ low: Bound<&'r [u8]>,
+ high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!("Iterators in transactions not supported with LMDB backend");
+ let tree = *self.get_tree(tree)?;
+ Ok(Box::new(
+ tree.rev_range(&self.tx, &(low, high))?.map(tx_iter_item),
+ ))
}
}
-// ----
+// ---- iterators outside transactions ----
+// complicated, they must hold the transaction object
+// therefore a bit of unsafe code (it is a self-referential struct)
type IteratorItem<'a> = heed::Result<(
<ByteSlice as BytesDecode<'a>>::DItem,
@@ -307,12 +320,20 @@ where
where
F: FnOnce(&'a RoTxn<'a>) -> Result<I>,
{
- let mut res = TxAndIterator { tx, iter: None };
+ let res = TxAndIterator { tx, iter: None };
+ let mut boxed = Box::pin(res);
+
+ // This unsafe allows us to bypass lifetime checks
+ let tx = unsafe { NonNull::from(&boxed.tx).as_ref() };
+ let iter = iterfun(tx)?;
- let tx = unsafe { NonNull::from(&res.tx).as_ref() };
- res.iter = Some(iterfun(tx)?);
+ let mut_ref = Pin::as_mut(&mut boxed);
+ // This unsafe allows us to write in a field of the pinned struct
+ unsafe {
+ Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
+ }
- Ok(Box::new(res))
+ Ok(Box::new(TxAndIteratorPin(boxed)))
}
}
@@ -321,18 +342,26 @@ where
I: Iterator<Item = IteratorItem<'a>> + 'a,
{
fn drop(&mut self) {
+ // ensure the iterator is dropped before the RoTxn it references
drop(self.iter.take());
}
}
-impl<'a, I> Iterator for TxAndIterator<'a, I>
+struct TxAndIteratorPin<'a, I>(Pin<Box<TxAndIterator<'a, I>>>)
+where
+ I: Iterator<Item = IteratorItem<'a>> + 'a;
+
+impl<'a, I> Iterator for TxAndIteratorPin<'a, I>
where
I: Iterator<Item = IteratorItem<'a>> + 'a,
{
type Item = Result<(Value, Value)>;
fn next(&mut self) -> Option<Self::Item> {
- match self.iter.as_mut().unwrap().next() {
+ let mut_ref = Pin::as_mut(&mut self.0);
+ // This unsafe allows us to mutably access the iterator field
+ let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
+ match next {
None => None,
Some(Err(e)) => Some(Err(e.into())),
Some(Ok((k, v))) => Some(Ok((k.to_vec(), v.to_vec()))),
@@ -340,7 +369,16 @@ where
}
}
-// ----
+// ---- iterators within transactions ----
+
+fn tx_iter_item<'a>(
+ item: std::result::Result<(&'a [u8], &'a [u8]), heed::Error>,
+) -> TxOpResult<(Vec<u8>, Vec<u8>)> {
+ item.map(|(k, v)| (k.to_vec(), v.to_vec()))
+ .map_err(|e| TxOpError(Error::from(e)))
+}
+
+// ---- utility ----
#[cfg(target_pointer_width = "64")]
pub fn recommended_map_size() -> usize {
diff --git a/src/db/open.rs b/src/db/open.rs
index ae135c4e..03476a42 100644
--- a/src/db/open.rs
+++ b/src/db/open.rs
@@ -11,7 +11,6 @@ use crate::{Db, Error, Result};
pub enum Engine {
Lmdb,
Sqlite,
- Sled,
}
impl Engine {
@@ -20,7 +19,6 @@ impl Engine {
match self {
Self::Lmdb => "lmdb",
Self::Sqlite => "sqlite",
- Self::Sled => "sled",
}
}
}
@@ -38,10 +36,10 @@ impl std::str::FromStr for Engine {
match text {
"lmdb" | "heed" => Ok(Self::Lmdb),
"sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite),
- "sled" => Ok(Self::Sled),
+ "sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.3).".into())),
kind => Err(Error(
format!(
- "Invalid DB engine: {} (options are: lmdb, sled, sqlite)",
+ "Invalid DB engine: {} (options are: lmdb, sqlite)",
kind
)
.into(),
@@ -53,8 +51,6 @@ impl std::str::FromStr for Engine {
pub struct OpenOpt {
pub fsync: bool,
pub lmdb_map_size: Option<usize>,
- pub sled_cache_capacity: usize,
- pub sled_flush_every_ms: u64,
}
impl Default for OpenOpt {
@@ -62,31 +58,12 @@ impl Default for OpenOpt {
Self {
fsync: false,
lmdb_map_size: None,
- sled_cache_capacity: 1024 * 1024 * 1024,
- sled_flush_every_ms: 2000,
}
}
}
pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result<Db> {
match engine {
- // ---- Sled DB ----
- #[cfg(feature = "sled")]
- Engine::Sled => {
- if opt.fsync {
- return Err(Error(
- "`metadata_fsync = true` is not supported with the Sled database engine".into(),
- ));
- }
- info!("Opening Sled database at: {}", path.display());
- let db = crate::sled_adapter::sled::Config::default()
- .path(&path)
- .cache_capacity(opt.sled_cache_capacity as u64)
- .flush_every_ms(Some(opt.sled_flush_every_ms))
- .open()?;
- Ok(crate::sled_adapter::SledDb::init(db))
- }
-
// ---- Sqlite DB ----
#[cfg(feature = "sqlite")]
Engine::Sqlite => {
diff --git a/src/db/sled_adapter.rs b/src/db/sled_adapter.rs
deleted file mode 100644
index 84f2001b..00000000
--- a/src/db/sled_adapter.rs
+++ /dev/null
@@ -1,274 +0,0 @@
-use core::ops::Bound;
-
-use std::cell::Cell;
-use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
-
-use sled::transaction::{
- ConflictableTransactionError, TransactionError, Transactional, TransactionalTree,
- UnabortableTransactionError,
-};
-
-use crate::{
- Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
- TxResult, TxValueIter, Value, ValueIter,
-};
-
-pub use sled;
-
-// -- err
-
-impl From<sled::Error> for Error {
- fn from(e: sled::Error) -> Error {
- Error(format!("Sled: {}", e).into())
- }
-}
-
-impl From<sled::Error> for TxOpError {
- fn from(e: sled::Error) -> TxOpError {
- TxOpError(e.into())
- }
-}
-
-// -- db
-
-pub struct SledDb {
- db: sled::Db,
- trees: RwLock<(Vec<sled::Tree>, HashMap<String, usize>)>,
-}
-
-impl SledDb {
- #[deprecated(
- since = "0.9.0",
- note = "The Sled database is now deprecated and will be removed in Garage v1.0. Please migrate to LMDB or Sqlite as soon as possible."
- )]
- pub fn init(db: sled::Db) -> Db {
- tracing::warn!("-------------------- IMPORTANT WARNING !!! ----------------------");
- tracing::warn!("The Sled database is now deprecated and will be removed in Garage v1.0.");
- tracing::warn!("Please migrate to LMDB or Sqlite as soon as possible.");
- tracing::warn!("-----------------------------------------------------------------------");
- let s = Self {
- db,
- trees: RwLock::new((Vec::new(), HashMap::new())),
- };
- Db(Arc::new(s))
- }
-
- fn get_tree(&self, i: usize) -> Result<sled::Tree> {
- self.trees
- .read()
- .unwrap()
- .0
- .get(i)
- .cloned()
- .ok_or_else(|| Error("invalid tree id".into()))
- }
-}
-
-impl IDb for SledDb {
- fn engine(&self) -> String {
- "Sled".into()
- }
-
- fn open_tree(&self, name: &str) -> Result<usize> {
- let mut trees = self.trees.write().unwrap();
- if let Some(i) = trees.1.get(name) {
- Ok(*i)
- } else {
- let tree = self.db.open_tree(name)?;
- let i = trees.0.len();
- trees.0.push(tree);
- trees.1.insert(name.to_string(), i);
- Ok(i)
- }
- }
-
- fn list_trees(&self) -> Result<Vec<String>> {
- let mut trees = vec![];
- for name in self.db.tree_names() {
- let name = std::str::from_utf8(&name)
- .map_err(|e| Error(format!("{}", e).into()))?
- .to_string();
- if name != "__sled__default" {
- trees.push(name);
- }
- }
- Ok(trees)
- }
-
- // ----
-
- fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>> {
- let tree = self.get_tree(tree)?;
- let val = tree.get(key)?;
- Ok(val.map(|x| x.to_vec()))
- }
-
- fn len(&self, tree: usize) -> Result<usize> {
- let tree = self.get_tree(tree)?;
- Ok(tree.len())
- }
-
- fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
- let tree = self.get_tree(tree)?;
- let old_val = tree.insert(key, value)?;
- Ok(old_val.map(|x| x.to_vec()))
- }
-
- fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>> {
- let tree = self.get_tree(tree)?;
- let old_val = tree.remove(key)?;
- Ok(old_val.map(|x| x.to_vec()))
- }
-
- fn clear(&self, tree: usize) -> Result<()> {
- let tree = self.get_tree(tree)?;
- tree.clear()?;
- Ok(())
- }
-
- fn iter(&self, tree: usize) -> Result<ValueIter<'_>> {
- let tree = self.get_tree(tree)?;
- Ok(Box::new(tree.iter().map(|v| {
- v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
- })))
- }
-
- fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> {
- let tree = self.get_tree(tree)?;
- Ok(Box::new(tree.iter().rev().map(|v| {
- v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
- })))
- }
-
- fn range<'r>(
- &self,
- tree: usize,
- low: Bound<&'r [u8]>,
- high: Bound<&'r [u8]>,
- ) -> Result<ValueIter<'_>> {
- let tree = self.get_tree(tree)?;
- Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).map(|v| {
- v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
- })))
- }
- fn range_rev<'r>(
- &self,
- tree: usize,
- low: Bound<&'r [u8]>,
- high: Bound<&'r [u8]>,
- ) -> Result<ValueIter<'_>> {
- let tree = self.get_tree(tree)?;
- Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).rev().map(
- |v| v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into),
- )))
- }
-
- // ----
-
- fn transaction(&self, f: &dyn ITxFn) -> TxResult<OnCommit, ()> {
- let trees = self.trees.read().unwrap();
- let res = trees.0.transaction(|txtrees| {
- let mut tx = SledTx {
- trees: txtrees,
- err: Cell::new(None),
- };
- match f.try_on(&mut tx) {
- TxFnResult::Ok(on_commit) => {
- assert!(tx.err.into_inner().is_none());
- Ok(on_commit)
- }
- TxFnResult::Abort => {
- assert!(tx.err.into_inner().is_none());
- Err(ConflictableTransactionError::Abort(()))
- }
- TxFnResult::DbErr => {
- let e = tx.err.into_inner().expect("No DB error");
- Err(e.into())
- }
- }
- });
- match res {
- Ok(on_commit) => Ok(on_commit),
- Err(TransactionError::Abort(())) => Err(TxError::Abort(())),
- Err(TransactionError::Storage(s)) => Err(TxError::Db(s.into())),
- }
- }
-}
-
-// ----
-
-struct SledTx<'a> {
- trees: &'a [TransactionalTree],
- err: Cell<Option<UnabortableTransactionError>>,
-}
-
-impl<'a> SledTx<'a> {
- fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalTree> {
- self.trees.get(i).ok_or_else(|| {
- TxOpError(Error(
- "invalid tree id (it might have been openned after the transaction started)".into(),
- ))
- })
- }
-
- fn save_error<R>(
- &self,
- v: std::result::Result<R, UnabortableTransactionError>,
- ) -> TxOpResult<R> {
- match v {
- Ok(x) => Ok(x),
- Err(e) => {
- let txt = format!("{}", e);
- self.err.set(Some(e));
- Err(TxOpError(Error(txt.into())))
- }
- }
- }
-}
-
-impl<'a> ITx for SledTx<'a> {
- fn get(&self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>> {
- let tree = self.get_tree(tree)?;
- let tmp = self.save_error(tree.get(key))?;
- Ok(tmp.map(|x| x.to_vec()))
- }
- fn len(&self, _tree: usize) -> TxOpResult<usize> {
- unimplemented!(".len() in transaction not supported with Sled backend")
- }
-
- fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>> {
- let tree = self.get_tree(tree)?;
- let old_val = self.save_error(tree.insert(key, value))?;
- Ok(old_val.map(|x| x.to_vec()))
- }
- fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>> {
- let tree = self.get_tree(tree)?;
- let old_val = self.save_error(tree.remove(key))?;
- Ok(old_val.map(|x| x.to_vec()))
- }
-
- fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!("Iterators in transactions not supported with Sled backend");
- }
- fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!("Iterators in transactions not supported with Sled backend");
- }
-
- fn range<'r>(
- &self,
- _tree: usize,
- _low: Bound<&'r [u8]>,
- _high: Bound<&'r [u8]>,
- ) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!("Iterators in transactions not supported with Sled backend");
- }
- fn range_rev<'r>(
- &self,
- _tree: usize,
- _low: Bound<&'r [u8]>,
- _high: Bound<&'r [u8]>,
- ) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!("Iterators in transactions not supported with Sled backend");
- }
-}
diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs
index 9f967c66..6c556c97 100644
--- a/src/db/sqlite_adapter.rs
+++ b/src/db/sqlite_adapter.rs
@@ -144,10 +144,6 @@ impl IDb for SqliteDb {
}
}
- fn fast_len(&self, tree: usize) -> Result<Option<usize>> {
- Ok(Some(self.len(tree)?))
- }
-
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
trace!("insert {}: lock db", tree);
let this = self.0.lock().unwrap();
@@ -367,33 +363,64 @@ impl<'a> ITx for SqliteTx<'a> {
Ok(old_val)
}
+ fn clear(&mut self, tree: usize) -> TxOpResult<()> {
+ let tree = self.get_tree(tree)?;
+ self.tx.execute(&format!("DELETE FROM {}", tree), [])?;
+ Ok(())
+ }
- fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!();
+ fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
+ let tree = self.get_tree(tree)?;
+ let sql = format!("SELECT k, v FROM {} ORDER BY k ASC", tree);
+ TxValueIterator::make(self, &sql, [])
}
- fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!();
+ fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
+ let tree = self.get_tree(tree)?;
+ let sql = format!("SELECT k, v FROM {} ORDER BY k DESC", tree);
+ TxValueIterator::make(self, &sql, [])
}
fn range<'r>(
&self,
- _tree: usize,
- _low: Bound<&'r [u8]>,
- _high: Bound<&'r [u8]>,
+ tree: usize,
+ low: Bound<&'r [u8]>,
+ high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!();
+ let tree = self.get_tree(tree)?;
+
+ let (bounds_sql, params) = bounds_sql(low, high);
+ let sql = format!("SELECT k, v FROM {} {} ORDER BY k ASC", tree, bounds_sql);
+
+ let params = params
+ .iter()
+ .map(|x| x as &dyn rusqlite::ToSql)
+ .collect::<Vec<_>>();
+
+ TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref())
}
fn range_rev<'r>(
&self,
- _tree: usize,
- _low: Bound<&'r [u8]>,
- _high: Bound<&'r [u8]>,
+ tree: usize,
+ low: Bound<&'r [u8]>,
+ high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
- unimplemented!();
+ let tree = self.get_tree(tree)?;
+
+ let (bounds_sql, params) = bounds_sql(low, high);
+ let sql = format!("SELECT k, v FROM {} {} ORDER BY k DESC", tree, bounds_sql);
+
+ let params = params
+ .iter()
+ .map(|x| x as &dyn rusqlite::ToSql)
+ .collect::<Vec<_>>();
+
+ TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref())
}
}
-// ----
+// ---- iterators outside transactions ----
+// complicated, they must hold the Statement and Row objects
+// therefore quite some unsafe code (it is a self-referential struct)
struct DbValueIterator<'a> {
db: MutexGuard<'a, SqliteDbInner>,
@@ -417,17 +444,23 @@ impl<'a> DbValueIterator<'a> {
let mut boxed = Box::pin(res);
trace!("make iterator with sql: {}", sql);
- unsafe {
- let db = NonNull::from(&boxed.db);
- let stmt = db.as_ref().db.prepare(sql)?;
+ // This unsafe allows us to bypass lifetime checks
+ let db = unsafe { NonNull::from(&boxed.db).as_ref() };
+ let stmt = db.db.prepare(sql)?;
- let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed);
+ let mut_ref = Pin::as_mut(&mut boxed);
+ // This unsafe allows us to write in a field of the pinned struct
+ unsafe {
Pin::get_unchecked_mut(mut_ref).stmt = Some(stmt);
+ }
- let mut stmt = NonNull::from(&boxed.stmt);
- let iter = stmt.as_mut().as_mut().unwrap().query(args)?;
+ // This unsafe allows us to bypass lifetime checks
+ let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() };
+ let iter = stmt.as_mut().unwrap().query(args)?;
- let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed);
+ let mut_ref = Pin::as_mut(&mut boxed);
+ // This unsafe allows us to write in a field of the pinned struct
+ unsafe {
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
}
@@ -449,28 +482,73 @@ impl<'a> Iterator for DbValueIteratorPin<'a> {
type Item = Result<(Value, Value)>;
fn next(&mut self) -> Option<Self::Item> {
- let next = unsafe {
- let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut self.0);
- Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next()
- };
- let row = match next {
- Err(e) => return Some(Err(e.into())),
- Ok(None) => return None,
- Ok(Some(r)) => r,
- };
- let k = match row.get::<_, Vec<u8>>(0) {
- Err(e) => return Some(Err(e.into())),
- Ok(x) => x,
- };
- let v = match row.get::<_, Vec<u8>>(1) {
- Err(e) => return Some(Err(e.into())),
- Ok(y) => y,
+ let mut_ref = Pin::as_mut(&mut self.0);
+ // This unsafe allows us to mutably access the iterator field
+ let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
+ iter_next_row(next)
+ }
+}
+
+// ---- iterators within transactions ----
+// it's the same except we don't hold a mutex guard,
+// only a Statement and a Rows object
+
+struct TxValueIterator<'a> {
+ stmt: Statement<'a>,
+ iter: Option<Rows<'a>>,
+ _pin: PhantomPinned,
+}
+
+impl<'a> TxValueIterator<'a> {
+ fn make<P: rusqlite::Params>(
+ tx: &'a SqliteTx<'a>,
+ sql: &str,
+ args: P,
+ ) -> TxOpResult<TxValueIter<'a>> {
+ let stmt = tx.tx.prepare(sql)?;
+ let res = TxValueIterator {
+ stmt,
+ iter: None,
+ _pin: PhantomPinned,
};
- Some(Ok((k, v)))
+ let mut boxed = Box::pin(res);
+ trace!("make iterator with sql: {}", sql);
+
+ // This unsafe allows us to bypass lifetime checks
+ let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() };
+ let iter = stmt.query(args)?;
+
+ let mut_ref = Pin::as_mut(&mut boxed);
+ // This unsafe allows us to write in a field of the pinned struct
+ unsafe {
+ Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
+ }
+
+ Ok(Box::new(TxValueIteratorPin(boxed)))
}
}
-// ----
+impl<'a> Drop for TxValueIterator<'a> {
+ fn drop(&mut self) {
+ trace!("drop iter");
+ drop(self.iter.take());
+ }
+}
+
+struct TxValueIteratorPin<'a>(Pin<Box<TxValueIterator<'a>>>);
+
+impl<'a> Iterator for TxValueIteratorPin<'a> {
+ type Item = TxOpResult<(Value, Value)>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let mut_ref = Pin::as_mut(&mut self.0);
+ // This unsafe allows us to mutably access the iterator field
+ let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
+ iter_next_row(next)
+ }
+}
+
+// ---- utility ----
fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec<Vec<u8>>) {
let mut sql = String::new();
@@ -510,3 +588,25 @@ fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec<V
(sql, params)
}
+
+fn iter_next_row<E>(
+ next_row: rusqlite::Result<Option<&rusqlite::Row>>,
+) -> Option<std::result::Result<(Value, Value), E>>
+where
+ E: From<rusqlite::Error>,
+{
+ let row = match next_row {
+ Err(e) => return Some(Err(e.into())),
+ Ok(None) => return None,
+ Ok(Some(r)) => r,
+ };
+ let k = match row.get::<_, Vec<u8>>(0) {
+ Err(e) => return Some(Err(e.into())),
+ Ok(x) => x,
+ };
+ let v = match row.get::<_, Vec<u8>>(1) {
+ Err(e) => return Some(Err(e.into())),
+ Ok(y) => y,
+ };
+ Some(Ok((k, v)))
+}
diff --git a/src/db/test.rs b/src/db/test.rs
index cd99eafa..3add89fb 100644
--- a/src/db/test.rs
+++ b/src/db/test.rs
@@ -10,8 +10,13 @@ fn test_suite(db: Db) {
let vb: &[u8] = &b"plip"[..];
let vc: &[u8] = &b"plup"[..];
+ // ---- test simple insert/delete ----
+
assert!(tree.insert(ka, va).unwrap().is_none());
assert_eq!(tree.get(ka).unwrap().unwrap(), va);
+ assert_eq!(tree.len().unwrap(), 1);
+
+ // ---- test transaction logic ----
let res = db.transaction::<_, (), _>(|tx| {
assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), va);
@@ -37,6 +42,8 @@ fn test_suite(db: Db) {
assert!(matches!(res, Err(TxError::Abort(42))));
assert_eq!(tree.get(ka).unwrap().unwrap(), vb);
+ // ---- test iteration outside of transactions ----
+
let mut iter = tree.iter().unwrap();
let next = iter.next().unwrap().unwrap();
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
@@ -73,6 +80,48 @@ fn test_suite(db: Db) {
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
assert!(iter.next().is_none());
drop(iter);
+
+ // ---- test iteration within transactions ----
+
+ db.transaction::<_, (), _>(|tx| {
+ let mut iter = tx.iter(&tree).unwrap();
+ let next = iter.next().unwrap().unwrap();
+ assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
+ let next = iter.next().unwrap().unwrap();
+ assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
+ assert!(iter.next().is_none());
+ Ok(())
+ })
+ .unwrap();
+
+ db.transaction::<_, (), _>(|tx| {
+ let mut iter = tx.range(&tree, kint..).unwrap();
+ let next = iter.next().unwrap().unwrap();
+ assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
+ assert!(iter.next().is_none());
+ Ok(())
+ })
+ .unwrap();
+
+ db.transaction::<_, (), _>(|tx| {
+ let mut iter = tx.range_rev(&tree, ..kint).unwrap();
+ let next = iter.next().unwrap().unwrap();
+ assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
+ assert!(iter.next().is_none());
+ Ok(())
+ })
+ .unwrap();
+
+ db.transaction::<_, (), _>(|tx| {
+ let mut iter = tx.iter_rev(&tree).unwrap();
+ let next = iter.next().unwrap().unwrap();
+ assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
+ let next = iter.next().unwrap().unwrap();
+ assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
+ assert!(iter.next().is_none());
+ Ok(())
+ })
+ .unwrap();
}
#[test]
@@ -91,17 +140,6 @@ fn test_lmdb_db() {
}
#[test]
-#[cfg(feature = "sled")]
-fn test_sled_db() {
- use crate::sled_adapter::SledDb;
-
- let path = mktemp::Temp::new_dir().unwrap();
- let db = SledDb::init(sled::open(path.to_path_buf()).unwrap());
- test_suite(db);
- drop(path);
-}
-
-#[test]
#[cfg(feature = "sqlite")]
fn test_sqlite_db() {
use crate::sqlite_adapter::SqliteDb;
diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml
index 00ecb35e..53449a1c 100644
--- a/src/garage/Cargo.toml
+++ b/src/garage/Cargo.toml
@@ -80,12 +80,11 @@ k2v-client.workspace = true
[features]
-default = [ "bundled-libs", "metrics", "sled", "lmdb", "sqlite", "k2v" ]
+default = [ "bundled-libs", "metrics", "lmdb", "sqlite", "k2v" ]
k2v = [ "garage_util/k2v", "garage_api/k2v" ]
-# Database engines, Sled is still our default even though we don't like it
-sled = [ "garage_model/sled" ]
+# Database engines
lmdb = [ "garage_model/lmdb" ]
sqlite = [ "garage_model/sqlite" ]
diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs
index 073693ed..300e1211 100644
--- a/src/garage/admin/mod.rs
+++ b/src/garage/admin/mod.rs
@@ -197,11 +197,11 @@ impl AdminRpcHandler {
// Gather table statistics
let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()];
- table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?);
- table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?);
- table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?);
- table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?);
- table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?);
+ table.push(self.gather_table_stats(&self.garage.bucket_table)?);
+ table.push(self.gather_table_stats(&self.garage.key_table)?);
+ table.push(self.gather_table_stats(&self.garage.object_table)?);
+ table.push(self.gather_table_stats(&self.garage.version_table)?);
+ table.push(self.gather_table_stats(&self.garage.block_ref_table)?);
write!(
&mut ret,
"\nTable stats:\n{}",
@@ -211,15 +211,7 @@ impl AdminRpcHandler {
// Gather block manager statistics
writeln!(&mut ret, "\nBlock manager stats:").unwrap();
- let rc_len = if opt.detailed {
- self.garage.block_manager.rc_len()?.to_string()
- } else {
- self.garage
- .block_manager
- .rc_fast_len()?
- .map(|x| x.to_string())
- .unwrap_or_else(|| "NC".into())
- };
+ let rc_len = self.garage.block_manager.rc_len()?.to_string();
writeln!(
&mut ret,
@@ -240,10 +232,6 @@ impl AdminRpcHandler {
)
.unwrap();
- if !opt.detailed {
- writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap();
- }
-
if !opt.skip_global {
write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap();
}
@@ -345,34 +333,13 @@ impl AdminRpcHandler {
ret
}
- fn gather_table_stats<F, R>(
- &self,
- t: &Arc<Table<F, R>>,
- detailed: bool,
- ) -> Result<String, Error>
+ fn gather_table_stats<F, R>(&self, t: &Arc<Table<F, R>>) -> Result<String, Error>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
- let (data_len, mkl_len) = if detailed {
- (
- t.data.store.len().map_err(GarageError::from)?.to_string(),
- t.merkle_updater.merkle_tree_len()?.to_string(),
- )
- } else {
- (
- t.data
- .store
- .fast_len()
- .map_err(GarageError::from)?
- .map(|x| x.to_string())
- .unwrap_or_else(|| "NC".into()),
- t.merkle_updater
- .merkle_tree_fast_len()?
- .map(|x| x.to_string())
- .unwrap_or_else(|| "NC".into()),
- )
- };
+ let data_len = t.data.store.len().map_err(GarageError::from)?.to_string();
+ let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string();
Ok(format!(
" {}\t{}\t{}\t{}\t{}",
diff --git a/src/garage/cli/convert_db.rs b/src/garage/cli/convert_db.rs
index 2aadb1d6..5346d55a 100644
--- a/src/garage/cli/convert_db.rs
+++ b/src/garage/cli/convert_db.rs
@@ -11,7 +11,7 @@ pub struct ConvertDbOpt {
/// https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0)
#[structopt(short = "i")]
input_path: PathBuf,
- /// Input database engine (sled, lmdb or sqlite; limited by db engines
+ /// Input database engine (lmdb or sqlite; limited by db engines
/// enabled in this build)
#[structopt(short = "a")]
input_engine: Engine,
diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs
index 63014dbc..a5e2e6e8 100644
--- a/src/garage/cli/structs.rs
+++ b/src/garage/cli/structs.rs
@@ -531,10 +531,6 @@ pub struct StatsOpt {
#[structopt(short = "a", long = "all-nodes")]
pub all_nodes: bool,
- /// Gather detailed statistics (this can be long)
- #[structopt(short = "d", long = "detailed")]
- pub detailed: bool,
-
/// Don't show global cluster stats (internal use in RPC)
#[structopt(skip)]
#[serde(default)]
diff --git a/src/garage/main.rs b/src/garage/main.rs
index e489fff0..5e9c061f 100644
--- a/src/garage/main.rs
+++ b/src/garage/main.rs
@@ -18,8 +18,8 @@ compile_error!("Either bundled-libs or system-libs Cargo feature must be enabled
#[cfg(all(feature = "bundled-libs", feature = "system-libs"))]
compile_error!("Only one of bundled-libs and system-libs Cargo features must be enabled");
-#[cfg(not(any(feature = "lmdb", feature = "sled", feature = "sqlite")))]
-compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb, sled or sqlite.");
+#[cfg(not(any(feature = "lmdb", feature = "sqlite")))]
+compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb or sqlite.");
use std::net::SocketAddr;
use std::path::PathBuf;
@@ -72,8 +72,6 @@ async fn main() {
let features = &[
#[cfg(feature = "k2v")]
"k2v",
- #[cfg(feature = "sled")]
- "sled",
#[cfg(feature = "lmdb")]
"lmdb",
#[cfg(feature = "sqlite")]
diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml
index 776671d0..a6bcfbe7 100644
--- a/src/model/Cargo.toml
+++ b/src/model/Cargo.toml
@@ -42,8 +42,7 @@ tokio.workspace = true
opentelemetry.workspace = true
[features]
-default = [ "sled", "lmdb", "sqlite" ]
+default = [ "lmdb", "sqlite" ]
k2v = [ "garage_util/k2v" ]
lmdb = [ "garage_db/lmdb" ]
-sled = [ "garage_db/sled" ]
sqlite = [ "garage_db/sqlite" ]
diff --git a/src/model/garage.rs b/src/model/garage.rs
index 7ec8b22e..8987c594 100644
--- a/src/model/garage.rs
+++ b/src/model/garage.rs
@@ -118,9 +118,6 @@ impl Garage {
.ok_or_message("Invalid `db_engine` value in configuration file")?;
let mut db_path = config.metadata_dir.clone();
match db_engine {
- db::Engine::Sled => {
- db_path.push("db");
- }
db::Engine::Sqlite => {
db_path.push("db.sqlite");
}
@@ -134,8 +131,6 @@ impl Garage {
v if v == usize::default() => None,
v => Some(v),
},
- sled_cache_capacity: config.sled_cache_capacity,
- sled_flush_every_ms: config.sled_flush_every_ms,
};
let db = db::open_db(&db_path, db_engine, &db_opt)
.ok_or_message("Unable to open metadata db")?;
diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs
index 50d4283f..9ecf168c 100644
--- a/src/model/s3/lifecycle_worker.rs
+++ b/src/model/s3/lifecycle_worker.rs
@@ -121,13 +121,7 @@ impl Worker for LifecycleWorker {
mpu_aborted,
..
} => {
- let n_objects = self
- .garage
- .object_table
- .data
- .store
- .fast_len()
- .unwrap_or(None);
+ let n_objects = self.garage.object_table.data.store.len().ok();
let progress = match n_objects {
None => "...".to_string(),
Some(total) => format!(
diff --git a/src/table/data.rs b/src/table/data.rs
index 7f6b7847..09f4e008 100644
--- a/src/table/data.rs
+++ b/src/table/data.rs
@@ -6,7 +6,6 @@ use serde_bytes::ByteBuf;
use tokio::sync::Notify;
use garage_db as db;
-use garage_db::counted_tree_hack::CountedTree;
use garage_util::data::*;
use garage_util::error::*;
@@ -36,7 +35,7 @@ pub struct TableData<F: TableSchema, R: TableReplication> {
pub(crate) insert_queue: db::Tree,
pub(crate) insert_queue_notify: Arc<Notify>,
- pub(crate) gc_todo: CountedTree,
+ pub(crate) gc_todo: db::Tree,
pub(crate) metrics: TableMetrics,
}
@@ -61,7 +60,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
let gc_todo = db
.open_tree(format!("{}:gc_todo_v2", F::TABLE_NAME))
.expect("Unable to open GC DB tree");
- let gc_todo = CountedTree::new(gc_todo).expect("Cannot count gc_todo_v2");
let metrics = TableMetrics::new(
F::TABLE_NAME,
@@ -370,6 +368,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
}
pub fn gc_todo_len(&self) -> Result<usize, Error> {
- Ok(self.gc_todo.len())
+ Ok(self.gc_todo.len()?)
}
}
diff --git a/src/table/gc.rs b/src/table/gc.rs
index ef788749..d30a1849 100644
--- a/src/table/gc.rs
+++ b/src/table/gc.rs
@@ -10,7 +10,7 @@ use serde_bytes::ByteBuf;
use futures::future::join_all;
use tokio::sync::watch;
-use garage_db::counted_tree_hack::CountedTree;
+use garage_db as db;
use garage_util::background::*;
use garage_util::data::*;
@@ -334,9 +334,9 @@ impl<F: TableSchema, R: TableReplication> Worker for GcWorker<F, R> {
}
}
-/// An entry stored in the gc_todo Sled tree associated with the table
+/// An entry stored in the gc_todo db tree associated with the table
/// Contains helper function for parsing, saving, and removing
-/// such entry in Sled
+/// such entry in the db
///
/// Format of an entry:
/// - key = 8 bytes: timestamp of tombstone
@@ -353,7 +353,7 @@ pub(crate) struct GcTodoEntry {
}
impl GcTodoEntry {
- /// Creates a new GcTodoEntry (not saved in Sled) from its components:
+ /// Creates a new GcTodoEntry (not saved in the db) from its components:
/// the key of an entry in the table, and the hash of the associated
/// serialized value
pub(crate) fn new(key: Vec<u8>, value_hash: Hash) -> Self {
@@ -376,7 +376,7 @@ impl GcTodoEntry {
}
/// Saves the GcTodoEntry in the gc_todo tree
- pub(crate) fn save(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> {
+ pub(crate) fn save(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> {
gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?;
Ok(())
}
@@ -386,12 +386,14 @@ impl GcTodoEntry {
/// This is usefull to remove a todo entry only under the condition
/// that it has not changed since the time it was read, i.e.
/// what we have to do is still the same
- pub(crate) fn remove_if_equal(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> {
- gc_todo_tree.compare_and_swap::<_, _, &[u8]>(
- &self.todo_table_key(),
- Some(self.value_hash),
- None,
- )?;
+ pub(crate) fn remove_if_equal(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> {
+ gc_todo_tree.db().transaction(|txn| {
+ let key = self.todo_table_key();
+ if txn.get(gc_todo_tree, &key)?.as_deref() == Some(self.value_hash.as_slice()) {
+ txn.remove(gc_todo_tree, &key)?;
+ }
+ Ok(())
+ })?;
Ok(())
}
diff --git a/src/table/merkle.rs b/src/table/merkle.rs
index 01271c58..596d5805 100644
--- a/src/table/merkle.rs
+++ b/src/table/merkle.rs
@@ -31,14 +31,14 @@ pub struct MerkleUpdater<F: TableSchema, R: TableReplication> {
// - value = the hash of the full serialized item, if present,
// or an empty vec if item is absent (deleted)
// Fields in data:
- // pub(crate) merkle_todo: sled::Tree,
+ // pub(crate) merkle_todo: db::Tree,
// pub(crate) merkle_todo_notify: Notify,
// Content of the merkle tree: items where
// - key = .bytes() for MerkleNodeKey
// - value = serialization of a MerkleNode, assumed to be MerkleNode::empty if not found
// Field in data:
- // pub(crate) merkle_tree: sled::Tree,
+ // pub(crate) merkle_tree: db::Tree,
empty_node_hash: Hash,
}
@@ -291,10 +291,6 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> {
Ok(self.data.merkle_tree.len()?)
}
- pub fn merkle_tree_fast_len(&self) -> Result<Option<usize>, Error> {
- Ok(self.data.merkle_tree.fast_len()?)
- }
-
pub fn todo_len(&self) -> Result<usize, Error> {
Ok(self.data.merkle_todo.len()?)
}
diff --git a/src/table/metrics.rs b/src/table/metrics.rs
index 8318a84f..7bb0959a 100644
--- a/src/table/metrics.rs
+++ b/src/table/metrics.rs
@@ -1,7 +1,6 @@
use opentelemetry::{global, metrics::*, KeyValue};
use garage_db as db;
-use garage_db::counted_tree_hack::CountedTree;
/// TableMetrics reference all counter used for metrics
pub struct TableMetrics {
@@ -27,7 +26,7 @@ impl TableMetrics {
store: db::Tree,
merkle_tree: db::Tree,
merkle_todo: db::Tree,
- gc_todo: CountedTree,
+ gc_todo: db::Tree,
) -> Self {
let meter = global::meter(table_name);
TableMetrics {
@@ -35,9 +34,9 @@ impl TableMetrics {
.u64_value_observer(
"table.size",
move |observer| {
- if let Ok(Some(v)) = store.fast_len() {
+ if let Ok(value) = store.len() {
observer.observe(
- v as u64,
+ value as u64,
&[KeyValue::new("table_name", table_name)],
);
}
@@ -49,9 +48,9 @@ impl TableMetrics {
.u64_value_observer(
"table.merkle_tree_size",
move |observer| {
- if let Ok(Some(v)) = merkle_tree.fast_len() {
+ if let Ok(value) = merkle_tree.len() {
observer.observe(
- v as u64,
+ value as u64,
&[KeyValue::new("table_name", table_name)],
);
}
@@ -77,10 +76,12 @@ impl TableMetrics {
.u64_value_observer(
"table.gc_todo_queue_length",
move |observer| {
- observer.observe(
- gc_todo.len() as u64,
- &[KeyValue::new("table_name", table_name)],
- );
+ if let Ok(value) = gc_todo.len() {
+ observer.observe(
+ value as u64,
+ &[KeyValue::new("table_name", table_name)],
+ );
+ }
},
)
.with_description("Table garbage collector TODO queue length")
diff --git a/src/util/config.rs b/src/util/config.rs
index b7f27676..e243c813 100644
--- a/src/util/config.rs
+++ b/src/util/config.rs
@@ -87,20 +87,10 @@ pub struct Config {
pub kubernetes_discovery: Option<KubernetesDiscoveryConfig>,
// -- DB
- /// Database engine to use for metadata (options: sled, sqlite, lmdb)
+ /// Database engine to use for metadata (options: sqlite, lmdb)
#[serde(default = "default_db_engine")]
pub db_engine: String,
- /// Sled cache size, in bytes
- #[serde(
- deserialize_with = "deserialize_capacity",
- default = "default_sled_cache_capacity"
- )]
- pub sled_cache_capacity: usize,
- /// Sled flush interval in milliseconds
- #[serde(default = "default_sled_flush_every_ms")]
- pub sled_flush_every_ms: u64,
-
/// LMDB map size
#[serde(deserialize_with = "deserialize_capacity", default)]
pub lmdb_map_size: usize,
@@ -246,13 +236,6 @@ fn default_db_engine() -> String {
"lmdb".into()
}
-fn default_sled_cache_capacity() -> usize {
- 128 * 1024 * 1024
-}
-fn default_sled_flush_every_ms() -> u64 {
- 2000
-}
-
fn default_block_size() -> usize {
1048576
}