diff options
author | Alex <alex@adnab.me> | 2022-06-08 10:01:44 +0200 |
---|---|---|
committer | Alex <alex@adnab.me> | 2022-06-08 10:01:44 +0200 |
commit | b44d3fc796484a50cd6854f20c9b46e5fddedc9d (patch) | |
tree | 29f6da0e8dc68485edf713aaa7331536f4ff4fde /src/db/counted_tree_hack.rs | |
parent | 7eed3ceda9cf964e3435f22fc1852e27f4f5a8ae (diff) | |
download | garage-b44d3fc796484a50cd6854f20c9b46e5fddedc9d.tar.gz garage-b44d3fc796484a50cd6854f20c9b46e5fddedc9d.zip |
Abstract database behind generic interface and implement alternative drivers (#322)
- [x] Design interface
- [x] Implement Sled backend
- [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction)
- [x] Convert Garage code to use generic interface
- [x] Proof-read converted Garage code
- [ ] Test everything well
- [x] Implement sqlite backend
- [x] Implement LMDB backend
- [ ] (Implement Persy backend?)
- [ ] (Implement other backends? (like RocksDB, ...))
- [x] Implement backend choice in config file and garage server module
- [x] Add CLI for converting between DB formats
- Exploit the new interface to put more things in transactions
- [x] `.updated()` trigger on Garage tables
Fix #284
**Bugs**
- [x] When exporting sqlite, trees iterate empty??
- [x] LMDB doesn't work
**Known issues for various back-ends**
- Sled:
- Eats all my RAM and also all my disk space
- `.len()` has to traverse the whole table
- Is actually quite slow on some operations
- And is actually pretty bad code...
- Sqlite:
- Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason.
- (adapter uses a bunch of unsafe code)
- Heed (LMDB):
- Not suited for 32-bit machines as it has to map the whole DB in memory.
- (adpater uses a tiny bit of unsafe code)
**My recommendation:** avoid 32-bit machines and use LMDB as much as possible.
**Converting databases** is actually quite easy. For example from Sled to LMDB:
```bash
cd src/db
cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb
```
Then, just add this to your `config.toml`:
```toml
db_engine = "lmdb"
```
Co-authored-by: Alex Auvolat <alex@adnab.me>
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322
Co-authored-by: Alex <alex@adnab.me>
Co-committed-by: Alex <alex@adnab.me>
Diffstat (limited to 'src/db/counted_tree_hack.rs')
-rw-r--r-- | src/db/counted_tree_hack.rs | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/src/db/counted_tree_hack.rs b/src/db/counted_tree_hack.rs new file mode 100644 index 00000000..bbe943a2 --- /dev/null +++ b/src/db/counted_tree_hack.rs @@ -0,0 +1,127 @@ +//! This hack allows a db tree to keep in RAM a counter of the number of entries +//! it contains, which is used to call .len() on it. This is usefull only for +//! the sled backend where .len() otherwise would have to traverse the whole +//! tree to count items. For sqlite and lmdb, this is mostly useless (but +//! hopefully not harmfull!). Note that a CountedTree cannot be part of a +//! transaction. + +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, +}; + +use crate::{Result, Tree, TxError, Value, ValueIter}; + +#[derive(Clone)] +pub struct CountedTree(Arc<CountedTreeInternal>); + +struct CountedTreeInternal { + tree: Tree, + len: AtomicUsize, +} + +impl CountedTree { + pub fn new(tree: Tree) -> Result<Self> { + let len = tree.len()?; + Ok(Self(Arc::new(CountedTreeInternal { + tree, + len: AtomicUsize::new(len), + }))) + } + + pub fn len(&self) -> usize { + self.0.len.load(Ordering::SeqCst) + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> { + self.0.tree.get(key) + } + + pub fn first(&self) -> Result<Option<(Value, Value)>> { + self.0.tree.first() + } + + pub fn iter(&self) -> Result<ValueIter<'_>> { + self.0.tree.iter() + } + + // ---- writing functions ---- + + pub fn insert<K, V>(&self, key: K, value: V) -> Result<Option<Value>> + where + K: AsRef<[u8]>, + V: AsRef<[u8]>, + { + let old_val = self.0.tree.insert(key, value)?; + if old_val.is_none() { + self.0.len.fetch_add(1, Ordering::SeqCst); + } + Ok(old_val) + } + + pub fn remove<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> { + let old_val = self.0.tree.remove(key)?; + if old_val.is_some() { + self.0.len.fetch_sub(1, Ordering::SeqCst); + } + Ok(old_val) + } + + pub fn compare_and_swap<K, OV, NV>( + &self, + key: K, + expected_old: Option<OV>, + new: Option<NV>, + ) -> Result<bool> + where + K: AsRef<[u8]>, + OV: AsRef<[u8]>, + NV: AsRef<[u8]>, + { + let old_some = expected_old.is_some(); + let new_some = new.is_some(); + + let tx_res = self.0.tree.db().transaction(|mut tx| { + let old_val = tx.get(&self.0.tree, &key)?; + let is_same = match (&old_val, &expected_old) { + (None, None) => true, + (Some(x), Some(y)) if x == y.as_ref() => true, + _ => false, + }; + if is_same { + match &new { + Some(v) => { + tx.insert(&self.0.tree, &key, v)?; + } + None => { + tx.remove(&self.0.tree, &key)?; + } + } + tx.commit(()) + } else { + tx.abort(()) + } + }); + + match tx_res { + Ok(()) => { + match (old_some, new_some) { + (false, true) => { + self.0.len.fetch_add(1, Ordering::SeqCst); + } + (true, false) => { + self.0.len.fetch_sub(1, Ordering::SeqCst); + } + _ => (), + } + Ok(true) + } + Err(TxError::Abort(())) => Ok(false), + Err(TxError::Db(e)) => Err(e), + } + } +} |