aboutsummaryrefslogtreecommitdiff
path: root/src/util
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2022-06-08 10:01:44 +0200
committerAlex <alex@adnab.me>2022-06-08 10:01:44 +0200
commitb44d3fc796484a50cd6854f20c9b46e5fddedc9d (patch)
tree29f6da0e8dc68485edf713aaa7331536f4ff4fde /src/util
parent7eed3ceda9cf964e3435f22fc1852e27f4f5a8ae (diff)
downloadgarage-b44d3fc796484a50cd6854f20c9b46e5fddedc9d.tar.gz
garage-b44d3fc796484a50cd6854f20c9b46e5fddedc9d.zip
Abstract database behind generic interface and implement alternative drivers (#322)
- [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
Diffstat (limited to 'src/util')
-rw-r--r--src/util/Cargo.toml4
-rw-r--r--src/util/config.rs11
-rw-r--r--src/util/error.rs12
-rw-r--r--src/util/lib.rs2
-rw-r--r--src/util/sled_counter.rs100
5 files changed, 19 insertions, 110 deletions
diff --git a/src/util/Cargo.toml b/src/util/Cargo.toml
index 95cde531..5d073436 100644
--- a/src/util/Cargo.toml
+++ b/src/util/Cargo.toml
@@ -14,6 +14,8 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
+garage_db = { version = "0.8.0", path = "../db" }
+
blake2 = "0.9"
err-derive = "0.3"
xxhash-rust = { version = "0.8", default-features = false, features = ["xxh3"] }
@@ -22,8 +24,6 @@ tracing = "0.1.30"
rand = "0.8"
sha2 = "0.9"
-sled = "0.34"
-
chrono = "0.4"
rmp-serde = "0.15"
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
diff --git a/src/util/config.rs b/src/util/config.rs
index 99ebce31..e8ef4fdd 100644
--- a/src/util/config.rs
+++ b/src/util/config.rs
@@ -64,14 +64,19 @@ pub struct Config {
#[serde(default)]
pub kubernetes_skip_crd: bool,
+ // -- DB
+ /// Database engine to use for metadata (options: sled, sqlite, lmdb)
+ #[serde(default = "default_db_engine")]
+ pub db_engine: String,
+
/// Sled cache size, in bytes
#[serde(default = "default_sled_cache_capacity")]
pub sled_cache_capacity: u64,
-
/// Sled flush interval in milliseconds
#[serde(default = "default_sled_flush_every_ms")]
pub sled_flush_every_ms: u64,
+ // -- APIs
/// Configuration for S3 api
pub s3_api: S3ApiConfig,
@@ -129,6 +134,10 @@ pub struct AdminConfig {
pub trace_sink: Option<String>,
}
+fn default_db_engine() -> String {
+ "sled".into()
+}
+
fn default_sled_cache_capacity() -> u64 {
128 * 1024 * 1024
}
diff --git a/src/util/error.rs b/src/util/error.rs
index 8734a0c8..9995c746 100644
--- a/src/util/error.rs
+++ b/src/util/error.rs
@@ -26,8 +26,8 @@ pub enum Error {
#[error(display = "Netapp error: {}", _0)]
Netapp(#[error(source)] netapp::error::Error),
- #[error(display = "Sled error: {}", _0)]
- Sled(#[error(source)] sled::Error),
+ #[error(display = "DB error: {}", _0)]
+ Db(#[error(source)] garage_db::Error),
#[error(display = "Messagepack encode error: {}", _0)]
RmpEncode(#[error(source)] rmp_serde::encode::Error),
@@ -78,11 +78,11 @@ impl Error {
}
}
-impl From<sled::transaction::TransactionError<Error>> for Error {
- fn from(e: sled::transaction::TransactionError<Error>) -> Error {
+impl From<garage_db::TxError<Error>> for Error {
+ fn from(e: garage_db::TxError<Error>) -> Error {
match e {
- sled::transaction::TransactionError::Abort(x) => x,
- sled::transaction::TransactionError::Storage(x) => Error::Sled(x),
+ garage_db::TxError::Abort(x) => x,
+ garage_db::TxError::Db(x) => Error::Db(x),
}
}
}
diff --git a/src/util/lib.rs b/src/util/lib.rs
index d8ffdd0b..8ca6e310 100644
--- a/src/util/lib.rs
+++ b/src/util/lib.rs
@@ -11,7 +11,7 @@ pub mod error;
pub mod formater;
pub mod metrics;
pub mod persister;
-pub mod sled_counter;
+//pub mod sled_counter;
pub mod time;
pub mod token_bucket;
pub mod tranquilizer;
diff --git a/src/util/sled_counter.rs b/src/util/sled_counter.rs
deleted file mode 100644
index bc54cea0..00000000
--- a/src/util/sled_counter.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-use std::sync::{
- atomic::{AtomicUsize, Ordering},
- Arc,
-};
-
-use sled::{CompareAndSwapError, IVec, Iter, Result, Tree};
-
-#[derive(Clone)]
-pub struct SledCountedTree(Arc<SledCountedTreeInternal>);
-
-struct SledCountedTreeInternal {
- tree: Tree,
- len: AtomicUsize,
-}
-
-impl SledCountedTree {
- pub fn new(tree: Tree) -> Self {
- let len = tree.len();
- Self(Arc::new(SledCountedTreeInternal {
- tree,
- len: AtomicUsize::new(len),
- }))
- }
-
- pub fn len(&self) -> usize {
- self.0.len.load(Ordering::Relaxed)
- }
-
- pub fn is_empty(&self) -> bool {
- self.0.tree.is_empty()
- }
-
- pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<IVec>> {
- self.0.tree.get(key)
- }
-
- pub fn iter(&self) -> Iter {
- self.0.tree.iter()
- }
-
- // ---- writing functions ----
-
- pub fn insert<K, V>(&self, key: K, value: V) -> Result<Option<IVec>>
- where
- K: AsRef<[u8]>,
- V: Into<IVec>,
- {
- let res = self.0.tree.insert(key, value);
- if res == Ok(None) {
- self.0.len.fetch_add(1, Ordering::Relaxed);
- }
- res
- }
-
- pub fn remove<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<IVec>> {
- let res = self.0.tree.remove(key);
- if matches!(res, Ok(Some(_))) {
- self.0.len.fetch_sub(1, Ordering::Relaxed);
- }
- res
- }
-
- pub fn pop_min(&self) -> Result<Option<(IVec, IVec)>> {
- let res = self.0.tree.pop_min();
- if let Ok(Some(_)) = &res {
- self.0.len.fetch_sub(1, Ordering::Relaxed);
- };
- res
- }
-
- pub fn compare_and_swap<K, OV, NV>(
- &self,
- key: K,
- old: Option<OV>,
- new: Option<NV>,
- ) -> Result<std::result::Result<(), CompareAndSwapError>>
- where
- K: AsRef<[u8]>,
- OV: AsRef<[u8]>,
- NV: Into<IVec>,
- {
- let old_some = old.is_some();
- let new_some = new.is_some();
-
- let res = self.0.tree.compare_and_swap(key, old, new);
-
- if res == Ok(Ok(())) {
- match (old_some, new_some) {
- (false, true) => {
- self.0.len.fetch_add(1, Ordering::Relaxed);
- }
- (true, false) => {
- self.0.len.fetch_sub(1, Ordering::Relaxed);
- }
- _ => (),
- }
- }
- res
- }
-}