aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/block/manager.rs3
-rw-r--r--src/block/repair.rs48
2 files changed, 40 insertions, 11 deletions
diff --git a/src/block/manager.rs b/src/block/manager.rs
index 051a9f93..bfa66069 100644
--- a/src/block/manager.rs
+++ b/src/block/manager.rs
@@ -185,6 +185,9 @@ impl BlockManager {
vars.register_ro(&self.scrub_persister, "scrub-last-completed", |p| {
p.get_with(|x| msec_to_rfc3339(x.time_last_complete_scrub))
});
+ vars.register_ro(&self.scrub_persister, "scrub-next-run", |p| {
+ p.get_with(|x| msec_to_rfc3339(x.time_next_run_scrub))
+ });
vars.register_ro(&self.scrub_persister, "scrub-corruptions_detected", |p| {
p.get_with(|x| x.corruptions_detected)
});
diff --git a/src/block/repair.rs b/src/block/repair.rs
index d4593dbf..10d46291 100644
--- a/src/block/repair.rs
+++ b/src/block/repair.rs
@@ -4,6 +4,7 @@ use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
+use rand::Rng;
use serde::{Deserialize, Serialize};
use tokio::fs;
use tokio::select;
@@ -19,8 +20,8 @@ use garage_util::tranquilizer::Tranquilizer;
use crate::manager::*;
-// Full scrub every 30 days
-const SCRUB_INTERVAL: Duration = Duration::from_secs(3600 * 24 * 30);
+// Full scrub every 25 days with a random element of 10 days mixed in below
+const SCRUB_INTERVAL: Duration = Duration::from_secs(3600 * 24 * 25);
// Scrub tranquility is initially set to 4, but can be changed in the CLI
// and the updated version is persisted over Garage restarts
const INITIAL_SCRUB_TRANQUILITY: u32 = 4;
@@ -175,13 +176,30 @@ pub struct ScrubWorker {
pub struct ScrubWorkerPersisted {
pub tranquility: u32,
pub(crate) time_last_complete_scrub: u64,
+ pub(crate) time_next_run_scrub: u64,
pub(crate) corruptions_detected: u64,
}
+
+fn randomize_next_scrub_run_time() -> u64 {
+ // Take SCRUB_INTERVAL and mix in a random interval of 10 days to attempt to
+ // balance scrub load across different cluster nodes.
+
+ let next_run_timestamp = now_msec()
+ + SCRUB_INTERVAL
+ .saturating_add(Duration::from_secs(
+ rand::thread_rng().gen_range(0..3600 * 24 * 10),
+ ))
+ .as_millis() as u64;
+
+ next_run_timestamp
+}
+
impl garage_util::migrate::InitialFormat for ScrubWorkerPersisted {}
impl Default for ScrubWorkerPersisted {
fn default() -> Self {
ScrubWorkerPersisted {
time_last_complete_scrub: 0,
+ time_next_run_scrub: randomize_next_scrub_run_time(),
tranquility: INITIAL_SCRUB_TRANQUILITY,
corruptions_detected: 0,
}
@@ -279,12 +297,13 @@ impl Worker for ScrubWorker {
}
fn status(&self) -> WorkerStatus {
- let (corruptions_detected, tranquility, time_last_complete_scrub) =
+ let (corruptions_detected, tranquility, time_last_complete_scrub, time_next_run_scrub) =
self.persister.get_with(|p| {
(
p.corruptions_detected,
p.tranquility,
p.time_last_complete_scrub,
+ p.time_next_run_scrub,
)
});
@@ -302,10 +321,16 @@ impl Worker for ScrubWorker {
s.freeform = vec![format!("Scrub paused, resumes at {}", msec_to_rfc3339(*rt))];
}
ScrubWorkerState::Finished => {
- s.freeform = vec![format!(
- "Last scrub completed at {}",
- msec_to_rfc3339(time_last_complete_scrub)
- )];
+ s.freeform = vec![
+ format!(
+ "Last scrub completed at {}",
+ msec_to_rfc3339(time_last_complete_scrub),
+ ),
+ format!(
+ "Next scrub scheduled for {}",
+ msec_to_rfc3339(time_next_run_scrub)
+ ),
+ ];
}
}
s
@@ -334,8 +359,10 @@ impl Worker for ScrubWorker {
.tranquilizer
.tranquilize_worker(self.persister.get_with(|p| p.tranquility)))
} else {
- self.persister
- .set_with(|p| p.time_last_complete_scrub = now_msec())?;
+ self.persister.set_with(|p| {
+ p.time_last_complete_scrub = now_msec();
+ p.time_next_run_scrub = randomize_next_scrub_run_time();
+ })?;
self.work = ScrubWorkerState::Finished;
self.tranquilizer.clear();
Ok(WorkerState::Idle)
@@ -350,8 +377,7 @@ impl Worker for ScrubWorker {
ScrubWorkerState::Running(_) => return WorkerState::Busy,
ScrubWorkerState::Paused(_, resume_time) => (*resume_time, ScrubWorkerCommand::Resume),
ScrubWorkerState::Finished => (
- self.persister.get_with(|p| p.time_last_complete_scrub)
- + SCRUB_INTERVAL.as_millis() as u64,
+ self.persister.get_with(|p| p.time_next_run_scrub),
ScrubWorkerCommand::Start,
),
};