From b44d3fc796484a50cd6854f20c9b46e5fddedc9d Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 8 Jun 2022 10:01:44 +0200 Subject: Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex Co-committed-by: Alex --- src/db/lib.rs | 400 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 400 insertions(+) create mode 100644 src/db/lib.rs (limited to 'src/db/lib.rs') diff --git a/src/db/lib.rs b/src/db/lib.rs new file mode 100644 index 00000000..e9d3ea18 --- /dev/null +++ b/src/db/lib.rs @@ -0,0 +1,400 @@ +pub mod lmdb_adapter; +pub mod sled_adapter; +pub mod sqlite_adapter; + +pub mod counted_tree_hack; + +#[cfg(test)] +pub mod test; + +use core::ops::{Bound, RangeBounds}; + +use std::borrow::Cow; +use std::cell::Cell; +use std::sync::Arc; + +use err_derive::Error; + +#[derive(Clone)] +pub struct Db(pub(crate) Arc); + +pub struct Transaction<'a>(&'a mut dyn ITx); + +#[derive(Clone)] +pub struct Tree(Arc, usize); + +pub type Value = Vec; +pub type ValueIter<'a> = Box> + 'a>; +pub type TxValueIter<'a> = Box> + 'a>; + +// ---- + +#[derive(Debug, Error)] +#[error(display = "{}", _0)] +pub struct Error(pub Cow<'static, str>); + +pub type Result = std::result::Result; + +#[derive(Debug, Error)] +#[error(display = "{}", _0)] +pub struct TxOpError(pub(crate) Error); +pub type TxOpResult = std::result::Result; + +pub enum TxError { + Abort(E), + Db(Error), +} +pub type TxResult = std::result::Result>; + +impl From for TxError { + fn from(e: TxOpError) -> TxError { + TxError::Db(e.0) + } +} + +pub fn unabort(res: TxResult) -> TxOpResult> { + match res { + Ok(v) => Ok(Ok(v)), + Err(TxError::Abort(e)) => Ok(Err(e)), + Err(TxError::Db(e)) => Err(TxOpError(e)), + } +} + +// ---- + +impl Db { + pub fn engine(&self) -> String { + self.0.engine() + } + + pub fn open_tree>(&self, name: S) -> Result { + let tree_id = self.0.open_tree(name.as_ref())?; + Ok(Tree(self.0.clone(), tree_id)) + } + + pub fn list_trees(&self) -> Result> { + self.0.list_trees() + } + + pub fn transaction(&self, fun: F) -> TxResult + where + F: Fn(Transaction<'_>) -> TxResult, + { + let f = TxFn { + function: fun, + result: Cell::new(None), + }; + let tx_res = self.0.transaction(&f); + let ret = f + .result + .into_inner() + .expect("Transaction did not store result"); + + match tx_res { + Ok(()) => { + assert!(matches!(ret, Ok(_))); + ret + } + Err(TxError::Abort(())) => { + assert!(matches!(ret, Err(TxError::Abort(_)))); + ret + } + Err(TxError::Db(e2)) => match ret { + // Ok was stored -> the error occured when finalizing + // transaction + Ok(_) => Err(TxError::Db(e2)), + // An error was already stored: that's the one we want to + // return + Err(TxError::Db(e)) => Err(TxError::Db(e)), + _ => unreachable!(), + }, + } + } + + pub fn import(&self, other: &Db) -> Result<()> { + let existing_trees = self.list_trees()?; + if !existing_trees.is_empty() { + return Err(Error( + format!( + "destination database already contains data: {:?}", + existing_trees + ) + .into(), + )); + } + + let tree_names = other.list_trees()?; + for name in tree_names { + let tree = self.open_tree(&name)?; + if tree.len()? > 0 { + return Err(Error(format!("tree {} already contains data", name).into())); + } + + let ex_tree = other.open_tree(&name)?; + + let tx_res = self.transaction(|mut tx| { + let mut i = 0; + for item in ex_tree.iter().map_err(TxError::Abort)? { + let (k, v) = item.map_err(TxError::Abort)?; + tx.insert(&tree, k, v)?; + i += 1; + if i % 1000 == 0 { + println!("{}: imported {}", name, i); + } + } + tx.commit(i) + }); + let total = match tx_res { + Err(TxError::Db(e)) => return Err(e), + Err(TxError::Abort(e)) => return Err(e), + Ok(x) => x, + }; + + println!("{}: finished importing, {} items", name, total); + } + Ok(()) + } +} + +#[allow(clippy::len_without_is_empty)] +impl Tree { + #[inline] + pub fn db(&self) -> Db { + Db(self.0.clone()) + } + + #[inline] + pub fn get>(&self, key: T) -> Result> { + self.0.get(self.1, key.as_ref()) + } + #[inline] + pub fn len(&self) -> Result { + self.0.len(self.1) + } + + #[inline] + pub fn first(&self) -> Result> { + self.iter()?.next().transpose() + } + #[inline] + pub fn get_gt>(&self, from: T) -> Result> { + self.range((Bound::Excluded(from), Bound::Unbounded))? + .next() + .transpose() + } + + /// Returns the old value if there was one + #[inline] + pub fn insert, U: AsRef<[u8]>>( + &self, + key: T, + value: U, + ) -> Result> { + self.0.insert(self.1, key.as_ref(), value.as_ref()) + } + /// Returns the old value if there was one + #[inline] + pub fn remove>(&self, key: T) -> Result> { + self.0.remove(self.1, key.as_ref()) + } + + #[inline] + pub fn iter(&self) -> Result> { + self.0.iter(self.1) + } + #[inline] + pub fn iter_rev(&self) -> Result> { + self.0.iter_rev(self.1) + } + + #[inline] + pub fn range(&self, range: R) -> Result> + where + K: AsRef<[u8]>, + R: RangeBounds, + { + let sb = range.start_bound(); + let eb = range.end_bound(); + self.0.range(self.1, get_bound(sb), get_bound(eb)) + } + #[inline] + pub fn range_rev(&self, range: R) -> Result> + where + K: AsRef<[u8]>, + R: RangeBounds, + { + let sb = range.start_bound(); + let eb = range.end_bound(); + self.0.range_rev(self.1, get_bound(sb), get_bound(eb)) + } +} + +#[allow(clippy::len_without_is_empty)] +impl<'a> Transaction<'a> { + #[inline] + pub fn get>(&self, tree: &Tree, key: T) -> TxOpResult> { + self.0.get(tree.1, key.as_ref()) + } + #[inline] + pub fn len(&self, tree: &Tree) -> TxOpResult { + self.0.len(tree.1) + } + + /// Returns the old value if there was one + #[inline] + pub fn insert, U: AsRef<[u8]>>( + &mut self, + tree: &Tree, + key: T, + value: U, + ) -> TxOpResult> { + self.0.insert(tree.1, key.as_ref(), value.as_ref()) + } + /// Returns the old value if there was one + #[inline] + pub fn remove>(&mut self, tree: &Tree, key: T) -> TxOpResult> { + self.0.remove(tree.1, key.as_ref()) + } + + #[inline] + pub fn iter(&self, tree: &Tree) -> TxOpResult> { + self.0.iter(tree.1) + } + #[inline] + pub fn iter_rev(&self, tree: &Tree) -> TxOpResult> { + self.0.iter_rev(tree.1) + } + + #[inline] + pub fn range(&self, tree: &Tree, range: R) -> TxOpResult> + where + K: AsRef<[u8]>, + R: RangeBounds, + { + let sb = range.start_bound(); + let eb = range.end_bound(); + self.0.range(tree.1, get_bound(sb), get_bound(eb)) + } + #[inline] + pub fn range_rev(&self, tree: &Tree, range: R) -> TxOpResult> + where + K: AsRef<[u8]>, + R: RangeBounds, + { + let sb = range.start_bound(); + let eb = range.end_bound(); + self.0.range_rev(tree.1, get_bound(sb), get_bound(eb)) + } + + // ---- + + #[inline] + pub fn abort(self, e: E) -> TxResult { + Err(TxError::Abort(e)) + } + + #[inline] + pub fn commit(self, r: R) -> TxResult { + Ok(r) + } +} + +// ---- Internal interfaces + +pub(crate) trait IDb: Send + Sync { + fn engine(&self) -> String; + fn open_tree(&self, name: &str) -> Result; + fn list_trees(&self) -> Result>; + + fn get(&self, tree: usize, key: &[u8]) -> Result>; + fn len(&self, tree: usize) -> Result; + + fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result>; + fn remove(&self, tree: usize, key: &[u8]) -> Result>; + + fn iter(&self, tree: usize) -> Result>; + fn iter_rev(&self, tree: usize) -> Result>; + + fn range<'r>( + &self, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, + ) -> Result>; + fn range_rev<'r>( + &self, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, + ) -> Result>; + + fn transaction(&self, f: &dyn ITxFn) -> TxResult<(), ()>; +} + +pub(crate) trait ITx { + fn get(&self, tree: usize, key: &[u8]) -> TxOpResult>; + fn len(&self, tree: usize) -> TxOpResult; + + fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult>; + fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult>; + + fn iter(&self, tree: usize) -> TxOpResult>; + fn iter_rev(&self, tree: usize) -> TxOpResult>; + + fn range<'r>( + &self, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, + ) -> TxOpResult>; + fn range_rev<'r>( + &self, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, + ) -> TxOpResult>; +} + +pub(crate) trait ITxFn { + fn try_on(&self, tx: &mut dyn ITx) -> TxFnResult; +} + +pub(crate) enum TxFnResult { + Ok, + Abort, + DbErr, +} + +struct TxFn +where + F: Fn(Transaction<'_>) -> TxResult, +{ + function: F, + result: Cell>>, +} + +impl ITxFn for TxFn +where + F: Fn(Transaction<'_>) -> TxResult, +{ + fn try_on(&self, tx: &mut dyn ITx) -> TxFnResult { + let res = (self.function)(Transaction(tx)); + let res2 = match &res { + Ok(_) => TxFnResult::Ok, + Err(TxError::Abort(_)) => TxFnResult::Abort, + Err(TxError::Db(_)) => TxFnResult::DbErr, + }; + self.result.set(Some(res)); + res2 + } +} + +// ---- + +fn get_bound>(b: Bound<&K>) -> Bound<&[u8]> { + match b { + Bound::Included(v) => Bound::Included(v.as_ref()), + Bound::Excluded(v) => Bound::Excluded(v.as_ref()), + Bound::Unbounded => Bound::Unbounded, + } +} -- cgit v1.2.3 From 77e3fd6db2c9cd3a10889bd071e95ef839cfbefc Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 15 Jun 2022 20:20:28 +0200 Subject: improve internal item counter mechanisms and implement bucket quotas (#326) - [x] Refactoring of internal counting API - [x] Repair procedure for counters (it's an offline procedure!!!) - [x] New counter for objects in buckets - [x] Add quotas to buckets struct - [x] Add CLI to manage bucket quotas - [x] Add admin API to manage bucket quotas - [x] Apply quotas by adding checks on put operations - [x] Proof-read Co-authored-by: Alex Auvolat Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/326 Co-authored-by: Alex Co-committed-by: Alex --- src/db/lib.rs | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/db/lib.rs') diff --git a/src/db/lib.rs b/src/db/lib.rs index e9d3ea18..8188c715 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -197,6 +197,11 @@ impl Tree { pub fn remove>(&self, key: T) -> Result> { self.0.remove(self.1, key.as_ref()) } + /// Clears all values from the tree + #[inline] + pub fn clear(&self) -> Result<()> { + self.0.clear(self.1) + } #[inline] pub fn iter(&self) -> Result> { @@ -311,6 +316,7 @@ pub(crate) trait IDb: Send + Sync { fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result>; fn remove(&self, tree: usize, key: &[u8]) -> Result>; + fn clear(&self, tree: usize) -> Result<()>; fn iter(&self, tree: usize) -> Result>; fn iter_rev(&self, tree: usize) -> Result>; -- cgit v1.2.3 From ac03fa7937d9da29d2358343a499fe9d15ac5f7c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 15 Jul 2022 18:31:19 +0200 Subject: Uniformize tracing::* imports (hopefully fixes 32-bit build) --- src/db/lib.rs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/db/lib.rs') diff --git a/src/db/lib.rs b/src/db/lib.rs index 8188c715..f185114e 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -1,3 +1,6 @@ +#[macro_use] +extern crate tracing; + pub mod lmdb_adapter; pub mod sled_adapter; pub mod sqlite_adapter; -- cgit v1.2.3 From b886c75450e3ee6a7c2b0a8265d7ada20a4d9d75 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 6 Sep 2022 17:09:43 +0200 Subject: Make all DB engines optional build features --- src/db/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/db/lib.rs') diff --git a/src/db/lib.rs b/src/db/lib.rs index f185114e..5304c195 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -1,8 +1,12 @@ #[macro_use] +#[cfg(feature = "sqlite")] extern crate tracing; +#[cfg(feature = "lmdb")] pub mod lmdb_adapter; +#[cfg(feature = "sled")] pub mod sled_adapter; +#[cfg(feature = "sqlite")] pub mod sqlite_adapter; pub mod counted_tree_hack; -- cgit v1.2.3 From 14492044394a875475b2159d51234ac1e35531bf Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 7 Sep 2022 18:02:13 +0200 Subject: Add warnings when features are not included in build --- src/db/lib.rs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/db/lib.rs') diff --git a/src/db/lib.rs b/src/db/lib.rs index 5304c195..d96586be 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -2,6 +2,9 @@ #[cfg(feature = "sqlite")] extern crate tracing; +#[cfg(not(any(feature = "lmdb", feature = "sled", feature = "sqlite")))] +compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb, sled or sqlite."); + #[cfg(feature = "lmdb")] pub mod lmdb_adapter; #[cfg(feature = "sled")] -- cgit v1.2.3