diff options
author | Alex Auvolat <alex@adnab.me> | 2022-09-06 22:13:01 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2022-09-06 22:13:01 +0200 |
commit | 6b958979bd898a576ee9c1021cc908b3ec75ffc7 (patch) | |
tree | 72e35a3b12c616965bcdf60bf18d1c0db29955ba /src/block/repair.rs | |
parent | 4024822585783368993ac26807d076d8c312bb35 (diff) | |
parent | d23b3a14fc28de164080e762f0e97e6cbc868940 (diff) | |
download | garage-6b958979bd898a576ee9c1021cc908b3ec75ffc7.tar.gz garage-6b958979bd898a576ee9c1021cc908b3ec75ffc7.zip |
Merge branch 'lx-perf-improvements' into netapp-stream-body
Diffstat (limited to 'src/block/repair.rs')
-rw-r--r-- | src/block/repair.rs | 34 |
1 files changed, 28 insertions, 6 deletions
diff --git a/src/block/repair.rs b/src/block/repair.rs index 07ff6772..e2884b69 100644 --- a/src/block/repair.rs +++ b/src/block/repair.rs @@ -19,7 +19,17 @@ use garage_util::tranquilizer::Tranquilizer; use crate::manager::*; -const SCRUB_INTERVAL: Duration = Duration::from_secs(3600 * 24 * 30); // full scrub every 30 days +// Full scrub every 30 days +const SCRUB_INTERVAL: Duration = Duration::from_secs(3600 * 24 * 30); +// Scrub tranquility is initially set to 4, but can be changed in the CLI +// and the updated version is persisted over Garage restarts +const INITIAL_SCRUB_TRANQUILITY: u32 = 4; + +// ---- ---- ---- +// FIRST KIND OF REPAIR: FINDING MISSING BLOCKS/USELESS BLOCKS +// This is a one-shot repair operation that can be launched, +// checks everything, and then exits. +// ---- ---- ---- pub struct RepairWorker { manager: Arc<BlockManager>, @@ -102,7 +112,9 @@ impl Worker for RepairWorker { } for hash in batch_of_hashes.into_iter() { - self.manager.put_to_resync(&hash, Duration::from_secs(0))?; + self.manager + .resync + .put_to_resync(&hash, Duration::from_secs(0))?; self.next_start = Some(hash) } @@ -114,7 +126,9 @@ impl Worker for RepairWorker { // This allows us to find blocks we are storing but don't actually need, // so that we can offload them if necessary and then delete them locally. if let Some(hash) = bi.next().await? { - self.manager.put_to_resync(&hash, Duration::from_secs(0))?; + self.manager + .resync + .put_to_resync(&hash, Duration::from_secs(0))?; Ok(WorkerState::Busy) } else { Ok(WorkerState::Done) @@ -128,7 +142,13 @@ impl Worker for RepairWorker { } } -// ---- +// ---- ---- ---- +// SECOND KIND OF REPAIR: SCRUBBING THE DATASTORE +// This is significantly more complex than the process above, +// as it is a continuously-running task that triggers automatically +// every SCRUB_INTERVAL, but can also be triggered manually +// and whose parameter (esp. speed) can be controlled at runtime. +// ---- ---- ---- pub struct ScrubWorker { manager: Arc<BlockManager>, @@ -176,7 +196,7 @@ impl ScrubWorker { Ok(v) => v, Err(_) => ScrubWorkerPersisted { time_last_complete_scrub: 0, - tranquility: 4, + tranquility: INITIAL_SCRUB_TRANQUILITY, corruptions_detected: 0, }, }; @@ -343,7 +363,9 @@ impl Worker for ScrubWorker { } } -// ---- +// ---- ---- ---- +// UTILITY FOR ENUMERATING THE BLOCK STORE +// ---- ---- ---- struct BlockStoreIterator { path: Vec<ReadingDir>, |