diff options
author | Alex Auvolat <alex@adnab.me> | 2022-06-06 12:02:42 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2022-06-06 12:02:42 +0200 |
commit | 4539a6c2298cfb4578261060e4a5af739a45c99f (patch) | |
tree | ec1520ca3402aab04eef6574cf04e7a443d5d5f6 | |
parent | 4f5d17d46464b09a9e238bf959d14f3127a129cc (diff) | |
download | garage-4539a6c2298cfb4578261060e4a5af739a45c99f.tar.gz garage-4539a6c2298cfb4578261060e4a5af739a45c99f.zip |
Fix more sqlite deadlocks
-rw-r--r-- | src/db/sqlite_adapter.rs | 133 | ||||
-rw-r--r-- | src/table/gc.rs | 20 |
2 files changed, 79 insertions, 74 deletions
diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 10e3bfee..e945c31c 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -1,9 +1,10 @@ use core::ops::Bound; +use std::borrow::BorrowMut; use std::marker::PhantomPinned; use std::pin::Pin; use std::ptr::NonNull; -use std::sync::{Arc, Mutex, MutexGuard, RwLock}; +use std::sync::{Arc, Mutex, MutexGuard}; use log::trace; @@ -29,24 +30,26 @@ impl<T> From<rusqlite::Error> for TxError<T> { // -- db -pub struct SqliteDb { - db: Mutex<Connection>, - trees: RwLock<Vec<String>>, +pub struct SqliteDb(Mutex<SqliteDbInner>); + +struct SqliteDbInner { + db: Connection, + trees: Vec<String>, } impl SqliteDb { pub fn init(db: rusqlite::Connection) -> Db { - let s = Self { - db: Mutex::new(db), - trees: RwLock::new(Vec::new()), - }; + let s = Self(Mutex::new(SqliteDbInner { + db, + trees: Vec::new(), + })); Db(Arc::new(s)) } +} +impl SqliteDbInner { fn get_tree(&self, i: usize) -> Result<String> { self.trees - .read() - .unwrap() .get(i) .cloned() .ok_or_else(|| Error("invalid tree id".into())) @@ -56,16 +59,13 @@ impl SqliteDb { impl IDb for SqliteDb { fn open_tree(&self, name: &str) -> Result<usize> { let name = format!("tree_{}", name.replace(':', "_COLON_")); + let mut this = self.0.lock().unwrap(); - let mut trees = self.trees.write().unwrap(); - if let Some(i) = trees.iter().position(|x| x == &name) { + if let Some(i) = this.trees.iter().position(|x| x == &name) { Ok(i) } else { - trace!("open tree {}: lock db", name); - let db = self.db.lock().unwrap(); trace!("create table {}", name); - - db.execute( + this.db.execute( &format!( "CREATE TABLE IF NOT EXISTS {} ( k BLOB PRIMARY KEY, @@ -75,10 +75,10 @@ impl IDb for SqliteDb { ), [], )?; - trace!("table created: {}", name); + trace!("table created: {}, unlocking", name); - let i = trees.len(); - trees.push(name.to_string()); + let i = this.trees.len(); + this.trees.push(name.to_string()); Ok(i) } } @@ -87,10 +87,10 @@ impl IDb for SqliteDb { let mut trees = vec![]; trace!("list_trees: lock db"); - let db = self.db.lock().unwrap(); + let this = self.0.lock().unwrap(); trace!("list_trees: lock acquired"); - let mut stmt = db.prepare( + let mut stmt = this.db.prepare( "SELECT name FROM sqlite_schema WHERE type = 'table' AND name LIKE 'tree_%'", )?; let mut rows = stmt.query([])?; @@ -106,13 +106,15 @@ impl IDb for SqliteDb { // ---- fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value<'_>>> { - let tree = self.get_tree(tree)?; - trace!("get {}: lock db", tree); - let db = self.db.lock().unwrap(); + let this = self.0.lock().unwrap(); trace!("get {}: lock acquired", tree); - let mut stmt = db.prepare(&format!("SELECT v FROM {} WHERE k = ?1", tree))?; + let tree = this.get_tree(tree)?; + + let mut stmt = this + .db + .prepare(&format!("SELECT v FROM {} WHERE k = ?1", tree))?; let mut res_iter = stmt.query([key])?; match res_iter.next()? { None => Ok(None), @@ -121,24 +123,24 @@ impl IDb for SqliteDb { } fn remove(&self, tree: usize, key: &[u8]) -> Result<bool> { - let tree = self.get_tree(tree)?; - trace!("remove {}: lock db", tree); - let db = self.db.lock().unwrap(); + let this = self.0.lock().unwrap(); trace!("remove {}: lock acquired", tree); - let res = db.execute(&format!("DELETE FROM {} WHERE k = ?1", tree), params![key])?; + let tree = this.get_tree(tree)?; + let res = this + .db + .execute(&format!("DELETE FROM {} WHERE k = ?1", tree), params![key])?; Ok(res > 0) } fn len(&self, tree: usize) -> Result<usize> { - let tree = self.get_tree(tree)?; - trace!("len {}: lock db", tree); - let db = self.db.lock().unwrap(); + let this = self.0.lock().unwrap(); trace!("len {}: lock acquired", tree); - let mut stmt = db.prepare(&format!("SELECT COUNT(*) FROM {}", tree))?; + let tree = this.get_tree(tree)?; + let mut stmt = this.db.prepare(&format!("SELECT COUNT(*) FROM {}", tree))?; let mut res_iter = stmt.query([])?; match res_iter.next()? { None => Ok(0), @@ -147,13 +149,12 @@ impl IDb for SqliteDb { } fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> { - let tree = self.get_tree(tree)?; - trace!("insert {}: lock db", tree); - let db = self.db.lock().unwrap(); + let this = self.0.lock().unwrap(); trace!("insert {}: lock acquired", tree); - db.execute( + let tree = this.get_tree(tree)?; + this.db.execute( &format!("INSERT OR REPLACE INTO {} (k, v) VALUES (?1, ?2)", tree), params![key, value], )?; @@ -161,25 +162,23 @@ impl IDb for SqliteDb { } fn iter(&self, tree: usize) -> Result<ValueIter<'_>> { - let tree = self.get_tree(tree)?; - let sql = format!("SELECT k, v FROM {} ORDER BY k ASC", tree); - trace!("iter {}: lock db", tree); - let db = self.db.lock().unwrap(); + let this = self.0.lock().unwrap(); trace!("iter {}: lock acquired", tree); - DbValueIterator::make(db, &sql, []) + let tree = this.get_tree(tree)?; + let sql = format!("SELECT k, v FROM {} ORDER BY k ASC", tree); + DbValueIterator::make(this, &sql, []) } fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> { - let tree = self.get_tree(tree)?; - let sql = format!("SELECT k, v FROM {} ORDER BY k DESC", tree); - trace!("iter_rev {}: lock db", tree); - let db = self.db.lock().unwrap(); + let this = self.0.lock().unwrap(); trace!("iter_rev {}: lock acquired", tree); - DbValueIterator::make(db, &sql, []) + let tree = this.get_tree(tree)?; + let sql = format!("SELECT k, v FROM {} ORDER BY k DESC", tree); + DbValueIterator::make(this, &sql, []) } fn range<'r>( @@ -188,7 +187,11 @@ impl IDb for SqliteDb { low: Bound<&'r [u8]>, high: Bound<&'r [u8]>, ) -> Result<ValueIter<'_>> { - let tree = self.get_tree(tree)?; + trace!("range {}: lock db", tree); + let this = self.0.lock().unwrap(); + trace!("range {}: lock acquired", tree); + + let tree = this.get_tree(tree)?; let (bounds_sql, params) = bounds_sql(low, high); let sql = format!("SELECT k, v FROM {} {} ORDER BY k ASC", tree, bounds_sql); @@ -198,11 +201,7 @@ impl IDb for SqliteDb { .map(|x| x as &dyn rusqlite::ToSql) .collect::<Vec<_>>(); - trace!("range {}: lock db", tree); - let db = self.db.lock().unwrap(); - trace!("range {}: lock acquired", tree); - - DbValueIterator::make::<&[&dyn rusqlite::ToSql]>(db, &sql, params.as_ref()) + DbValueIterator::make::<&[&dyn rusqlite::ToSql]>(this, &sql, params.as_ref()) } fn range_rev<'r>( &self, @@ -210,7 +209,11 @@ impl IDb for SqliteDb { low: Bound<&'r [u8]>, high: Bound<&'r [u8]>, ) -> Result<ValueIter<'_>> { - let tree = self.get_tree(tree)?; + trace!("range_rev {}: lock db", tree); + let this = self.0.lock().unwrap(); + trace!("range_rev {}: lock acquired", tree); + + let tree = this.get_tree(tree)?; let (bounds_sql, params) = bounds_sql(low, high); let sql = format!("SELECT k, v FROM {} {} ORDER BY k DESC", tree, bounds_sql); @@ -220,25 +223,21 @@ impl IDb for SqliteDb { .map(|x| x as &dyn rusqlite::ToSql) .collect::<Vec<_>>(); - trace!("range_rev {}: lock db", tree); - let db = self.db.lock().unwrap(); - trace!("range_rev {}: lock acquired", tree); - - DbValueIterator::make::<&[&dyn rusqlite::ToSql]>(db, &sql, params.as_ref()) + DbValueIterator::make::<&[&dyn rusqlite::ToSql]>(this, &sql, params.as_ref()) } // ---- fn transaction(&self, f: &dyn ITxFn) -> TxResult<(), ()> { - let trees = self.trees.read().unwrap(); - trace!("transaction: lock db"); - let mut db = self.db.lock().unwrap(); + let mut this = self.0.lock().unwrap(); trace!("transaction: lock acquired"); + let this_mut_ref: &mut SqliteDbInner = this.borrow_mut(); + let mut tx = SqliteTx { - tx: db.transaction()?, - trees: trees.as_ref(), + tx: this_mut_ref.db.transaction()?, + trees: &this_mut_ref.trees, }; let res = match f.try_on(&mut tx) { TxFnResult::Ok => { @@ -345,7 +344,7 @@ impl<'a> ITx for SqliteTx<'a> { // ---- struct DbValueIterator<'a> { - db: MutexGuard<'a, Connection>, + db: MutexGuard<'a, SqliteDbInner>, stmt: Option<Statement<'a>>, iter: Option<Rows<'a>>, _pin: PhantomPinned, @@ -353,7 +352,7 @@ struct DbValueIterator<'a> { impl<'a> DbValueIterator<'a> { fn make<P: rusqlite::Params>( - db: MutexGuard<'a, Connection>, + db: MutexGuard<'a, SqliteDbInner>, sql: &str, args: P, ) -> Result<ValueIter<'a>> { @@ -368,7 +367,7 @@ impl<'a> DbValueIterator<'a> { unsafe { let db = NonNull::from(&boxed.db); - let stmt = db.as_ref().prepare(sql)?; + let stmt = db.as_ref().db.prepare(sql)?; let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed); Pin::get_unchecked_mut(mut_ref).stmt = Some(stmt); diff --git a/src/table/gc.rs b/src/table/gc.rs index e2611389..2a23f4cc 100644 --- a/src/table/gc.rs +++ b/src/table/gc.rs @@ -101,18 +101,16 @@ where async fn gc_loop_iter(&self) -> Result<Option<Duration>, Error> { let now = now_msec(); - let mut entries = vec![]; - let mut excluded = vec![]; - // List entries in the GC todo list // These entries are put there when a tombstone is inserted in the table // (see update_entry in data.rs) + let mut candidates = vec![]; for entry_kv in self.data.gc_todo.iter()? { let (k, vhash) = entry_kv?; - let mut todo_entry = GcTodoEntry::parse(&k, &vhash); + let todo_entry = GcTodoEntry::parse(&k, &vhash); if todo_entry.deletion_time() > now { - if entries.is_empty() && excluded.is_empty() { + if candidates.is_empty() { // If the earliest entry in the todo list shouldn't yet be processed, // return a duration to wait in the loop return Ok(Some(Duration::from_millis( @@ -124,15 +122,23 @@ where } } - let vhash = Hash::try_from(&vhash[..]).unwrap(); + candidates.push(todo_entry); + if candidates.len() >= 2 * TABLE_GC_BATCH_SIZE { + break; + } + } + let mut entries = vec![]; + let mut excluded = vec![]; + for mut todo_entry in candidates { // Check if the tombstone is still the current value of the entry. // If not, we don't actually want to GC it, and we will remove it // from the gc_todo table later (below). + let vhash = todo_entry.value_hash; todo_entry.value = self .data .store - .get(&k[..])? + .get(&todo_entry.key[..])? .filter(|v| blake2sum(&v[..]) == vhash) .map(|v| v.to_vec()); |