aboutsummaryrefslogtreecommitdiff
path: root/src/table/gc.rs
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2022-06-08 10:01:44 +0200
committerAlex <alex@adnab.me>2022-06-08 10:01:44 +0200
commitb44d3fc796484a50cd6854f20c9b46e5fddedc9d (patch)
tree29f6da0e8dc68485edf713aaa7331536f4ff4fde /src/table/gc.rs
parent7eed3ceda9cf964e3435f22fc1852e27f4f5a8ae (diff)
downloadgarage-b44d3fc796484a50cd6854f20c9b46e5fddedc9d.tar.gz
garage-b44d3fc796484a50cd6854f20c9b46e5fddedc9d.zip
Abstract database behind generic interface and implement alternative drivers (#322)
- [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
Diffstat (limited to 'src/table/gc.rs')
-rw-r--r--src/table/gc.rs41
1 files changed, 24 insertions, 17 deletions
diff --git a/src/table/gc.rs b/src/table/gc.rs
index 2a05b6ae..e7fbbcb0 100644
--- a/src/table/gc.rs
+++ b/src/table/gc.rs
@@ -12,9 +12,10 @@ use futures::select;
use futures_util::future::*;
use tokio::sync::watch;
+use garage_db::counted_tree_hack::CountedTree;
+
use garage_util::data::*;
use garage_util::error::*;
-use garage_util::sled_counter::SledCountedTree;
use garage_util::time::*;
use garage_rpc::system::System;
@@ -100,18 +101,16 @@ where
async fn gc_loop_iter(&self) -> Result<Option<Duration>, Error> {
let now = now_msec();
- let mut entries = vec![];
- let mut excluded = vec![];
-
// List entries in the GC todo list
// These entries are put there when a tombstone is inserted in the table
// (see update_entry in data.rs)
- for entry_kv in self.data.gc_todo.iter() {
+ let mut candidates = vec![];
+ for entry_kv in self.data.gc_todo.iter()? {
let (k, vhash) = entry_kv?;
- let mut todo_entry = GcTodoEntry::parse(&k, &vhash);
+ let todo_entry = GcTodoEntry::parse(&k, &vhash);
if todo_entry.deletion_time() > now {
- if entries.is_empty() && excluded.is_empty() {
+ if candidates.is_empty() {
// If the earliest entry in the todo list shouldn't yet be processed,
// return a duration to wait in the loop
return Ok(Some(Duration::from_millis(
@@ -123,15 +122,23 @@ where
}
}
- let vhash = Hash::try_from(&vhash[..]).unwrap();
+ candidates.push(todo_entry);
+ if candidates.len() >= 2 * TABLE_GC_BATCH_SIZE {
+ break;
+ }
+ }
+ let mut entries = vec![];
+ let mut excluded = vec![];
+ for mut todo_entry in candidates {
// Check if the tombstone is still the current value of the entry.
// If not, we don't actually want to GC it, and we will remove it
// from the gc_todo table later (below).
+ let vhash = todo_entry.value_hash;
todo_entry.value = self
.data
.store
- .get(&k[..])?
+ .get(&todo_entry.key[..])?
.filter(|v| blake2sum(&v[..]) == vhash)
.map(|v| v.to_vec());
@@ -353,17 +360,17 @@ impl GcTodoEntry {
}
/// Parses a GcTodoEntry from a (k, v) pair stored in the gc_todo tree
- pub(crate) fn parse(sled_k: &[u8], sled_v: &[u8]) -> Self {
+ pub(crate) fn parse(db_k: &[u8], db_v: &[u8]) -> Self {
Self {
- tombstone_timestamp: u64::from_be_bytes(sled_k[0..8].try_into().unwrap()),
- key: sled_k[8..].to_vec(),
- value_hash: Hash::try_from(sled_v).unwrap(),
+ tombstone_timestamp: u64::from_be_bytes(db_k[0..8].try_into().unwrap()),
+ key: db_k[8..].to_vec(),
+ value_hash: Hash::try_from(db_v).unwrap(),
value: None,
}
}
/// Saves the GcTodoEntry in the gc_todo tree
- pub(crate) fn save(&self, gc_todo_tree: &SledCountedTree) -> Result<(), Error> {
+ pub(crate) fn save(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> {
gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?;
Ok(())
}
@@ -373,9 +380,9 @@ impl GcTodoEntry {
/// This is usefull to remove a todo entry only under the condition
/// that it has not changed since the time it was read, i.e.
/// what we have to do is still the same
- pub(crate) fn remove_if_equal(&self, gc_todo_tree: &SledCountedTree) -> Result<(), Error> {
- let _ = gc_todo_tree.compare_and_swap::<_, _, Vec<u8>>(
- &self.todo_table_key()[..],
+ pub(crate) fn remove_if_equal(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> {
+ gc_todo_tree.compare_and_swap::<_, _, &[u8]>(
+ &self.todo_table_key(),
Some(self.value_hash),
None,
)?;