aboutsummaryrefslogtreecommitdiff
path: root/src/db/counted_tree_hack.rs
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2022-06-08 10:01:44 +0200
committerAlex <alex@adnab.me>2022-06-08 10:01:44 +0200
commitb44d3fc796484a50cd6854f20c9b46e5fddedc9d (patch)
tree29f6da0e8dc68485edf713aaa7331536f4ff4fde /src/db/counted_tree_hack.rs
parent7eed3ceda9cf964e3435f22fc1852e27f4f5a8ae (diff)
downloadgarage-b44d3fc796484a50cd6854f20c9b46e5fddedc9d.tar.gz
garage-b44d3fc796484a50cd6854f20c9b46e5fddedc9d.zip
Abstract database behind generic interface and implement alternative drivers (#322)
- [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
Diffstat (limited to 'src/db/counted_tree_hack.rs')
-rw-r--r--src/db/counted_tree_hack.rs127
1 files changed, 127 insertions, 0 deletions
diff --git a/src/db/counted_tree_hack.rs b/src/db/counted_tree_hack.rs
new file mode 100644
index 00000000..bbe943a2
--- /dev/null
+++ b/src/db/counted_tree_hack.rs
@@ -0,0 +1,127 @@
+//! This hack allows a db tree to keep in RAM a counter of the number of entries
+//! it contains, which is used to call .len() on it. This is usefull only for
+//! the sled backend where .len() otherwise would have to traverse the whole
+//! tree to count items. For sqlite and lmdb, this is mostly useless (but
+//! hopefully not harmfull!). Note that a CountedTree cannot be part of a
+//! transaction.
+
+use std::sync::{
+ atomic::{AtomicUsize, Ordering},
+ Arc,
+};
+
+use crate::{Result, Tree, TxError, Value, ValueIter};
+
+#[derive(Clone)]
+pub struct CountedTree(Arc<CountedTreeInternal>);
+
+struct CountedTreeInternal {
+ tree: Tree,
+ len: AtomicUsize,
+}
+
+impl CountedTree {
+ pub fn new(tree: Tree) -> Result<Self> {
+ let len = tree.len()?;
+ Ok(Self(Arc::new(CountedTreeInternal {
+ tree,
+ len: AtomicUsize::new(len),
+ })))
+ }
+
+ pub fn len(&self) -> usize {
+ self.0.len.load(Ordering::SeqCst)
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
+ self.0.tree.get(key)
+ }
+
+ pub fn first(&self) -> Result<Option<(Value, Value)>> {
+ self.0.tree.first()
+ }
+
+ pub fn iter(&self) -> Result<ValueIter<'_>> {
+ self.0.tree.iter()
+ }
+
+ // ---- writing functions ----
+
+ pub fn insert<K, V>(&self, key: K, value: V) -> Result<Option<Value>>
+ where
+ K: AsRef<[u8]>,
+ V: AsRef<[u8]>,
+ {
+ let old_val = self.0.tree.insert(key, value)?;
+ if old_val.is_none() {
+ self.0.len.fetch_add(1, Ordering::SeqCst);
+ }
+ Ok(old_val)
+ }
+
+ pub fn remove<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
+ let old_val = self.0.tree.remove(key)?;
+ if old_val.is_some() {
+ self.0.len.fetch_sub(1, Ordering::SeqCst);
+ }
+ Ok(old_val)
+ }
+
+ pub fn compare_and_swap<K, OV, NV>(
+ &self,
+ key: K,
+ expected_old: Option<OV>,
+ new: Option<NV>,
+ ) -> Result<bool>
+ where
+ K: AsRef<[u8]>,
+ OV: AsRef<[u8]>,
+ NV: AsRef<[u8]>,
+ {
+ let old_some = expected_old.is_some();
+ let new_some = new.is_some();
+
+ let tx_res = self.0.tree.db().transaction(|mut tx| {
+ let old_val = tx.get(&self.0.tree, &key)?;
+ let is_same = match (&old_val, &expected_old) {
+ (None, None) => true,
+ (Some(x), Some(y)) if x == y.as_ref() => true,
+ _ => false,
+ };
+ if is_same {
+ match &new {
+ Some(v) => {
+ tx.insert(&self.0.tree, &key, v)?;
+ }
+ None => {
+ tx.remove(&self.0.tree, &key)?;
+ }
+ }
+ tx.commit(())
+ } else {
+ tx.abort(())
+ }
+ });
+
+ match tx_res {
+ Ok(()) => {
+ match (old_some, new_some) {
+ (false, true) => {
+ self.0.len.fetch_add(1, Ordering::SeqCst);
+ }
+ (true, false) => {
+ self.0.len.fetch_sub(1, Ordering::SeqCst);
+ }
+ _ => (),
+ }
+ Ok(true)
+ }
+ Err(TxError::Abort(())) => Ok(false),
+ Err(TxError::Db(e)) => Err(e),
+ }
+ }
+}