From de9d6cddf709e686ada3d1e71de7b31d7704b8b5 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 12 Dec 2022 17:16:49 +0100 Subject: Prettier worker list table; remove useless CLI log messages --- src/block/resync.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'src/block/resync.rs') diff --git a/src/block/resync.rs b/src/block/resync.rs index ada3ac54..875ead9b 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -477,27 +477,22 @@ impl Worker for ResyncWorker { format!("Block resync worker #{}", self.index + 1) } - fn info(&self) -> Option { + fn status(&self) -> WorkerStatus { let persisted = self.manager.resync.persisted.load(); if self.index >= persisted.n_workers { - return Some("(unused)".into()); + return WorkerStatus { + freeform: vec!["(unused)".into()], + ..Default::default() + }; } - let mut ret = vec![]; - ret.push(format!("tranquility = {}", persisted.tranquility)); - - let qlen = self.manager.resync.queue_len().unwrap_or(0); - if qlen > 0 { - ret.push(format!("{} blocks in queue", qlen)); - } - - let elen = self.manager.resync.errors_len().unwrap_or(0); - if elen > 0 { - ret.push(format!("{} blocks in error state", elen)); + WorkerStatus { + queue_length: Some(self.manager.resync.queue_len().unwrap_or(0) as u64), + tranquility: Some(persisted.tranquility), + persistent_errors: Some(self.manager.resync.errors_len().unwrap_or(0) as u64), + ..Default::default() } - - Some(ret.join(", ")) } async fn work(&mut self, _must_exit: &mut watch::Receiver) -> Result { -- cgit v1.2.3 From 9d82196945f751c825621573657cfead992b356b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 13 Dec 2022 12:24:30 +0100 Subject: cli: new worker info command --- src/block/resync.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/block/resync.rs') diff --git a/src/block/resync.rs b/src/block/resync.rs index 875ead9b..55d28c14 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -257,7 +257,7 @@ impl BlockResyncManager { if let Err(e) = &res { manager.metrics.resync_error_counter.add(1); - warn!("Error when resyncing {:?}: {}", hash, e); + error!("Error when resyncing {:?}: {}", hash, e); let err_counter = match self.errors.get(hash.as_slice())? { Some(ec) => ErrorCounter::decode(&ec).add1(now + 1), @@ -482,7 +482,7 @@ impl Worker for ResyncWorker { if self.index >= persisted.n_workers { return WorkerStatus { - freeform: vec!["(unused)".into()], + freeform: vec!["This worker is currently disabled".into()], ..Default::default() }; } -- cgit v1.2.3 From 687660b27f904422c689e09d2457293e5313d325 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 13 Dec 2022 14:23:45 +0100 Subject: Implement `block list-errors` and `block info` --- src/block/resync.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/block/resync.rs') diff --git a/src/block/resync.rs b/src/block/resync.rs index 55d28c14..53b44774 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -540,9 +540,9 @@ impl Worker for ResyncWorker { /// and the time of the last try. /// Used to implement exponential backoff. #[derive(Clone, Copy, Debug)] -struct ErrorCounter { - errors: u64, - last_try: u64, +pub(crate) struct ErrorCounter { + pub(crate) errors: u64, + pub(crate) last_try: u64, } impl ErrorCounter { @@ -553,12 +553,13 @@ impl ErrorCounter { } } - fn decode(data: &[u8]) -> Self { + pub(crate) fn decode(data: &[u8]) -> Self { Self { errors: u64::from_be_bytes(data[0..8].try_into().unwrap()), last_try: u64::from_be_bytes(data[8..16].try_into().unwrap()), } } + fn encode(&self) -> Vec { [ u64::to_be_bytes(self.errors), @@ -578,7 +579,8 @@ impl ErrorCounter { (RESYNC_RETRY_DELAY.as_millis() as u64) << std::cmp::min(self.errors - 1, RESYNC_RETRY_DELAY_MAX_BACKOFF_POWER) } - fn next_try(&self) -> u64 { + + pub(crate) fn next_try(&self) -> u64 { self.last_try + self.delay_msec() } } -- cgit v1.2.3 From d7f90cabb0517a50a6c3dd702852770240566bfc Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 13 Dec 2022 15:02:42 +0100 Subject: Implement `block retry-now` and `block purge` --- src/block/resync.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/block/resync.rs') diff --git a/src/block/resync.rs b/src/block/resync.rs index 53b44774..8231b55d 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -123,6 +123,24 @@ impl BlockResyncManager { Ok(self.errors.len()) } + /// Clear the error counter for a block and put it in queue immediately + pub fn clear_backoff(&self, hash: &Hash) -> Result<(), Error> { + let now = now_msec(); + if let Some(ec) = self.errors.get(hash)? { + let mut ec = ErrorCounter::decode(&ec); + if ec.errors > 0 { + ec.last_try = now - ec.delay_msec(); + self.errors.insert(hash, ec.encode())?; + self.put_to_resync_at(hash, now)?; + return Ok(()); + } + } + Err(Error::Message(format!( + "Block {:?} was not in an errored state", + hash + ))) + } + // ---- Resync loop ---- // This part manages a queue of blocks that need to be -- cgit v1.2.3 From dfc131850a09e7ceacfa98315adbef156e07e9ca Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 14 Dec 2022 15:25:29 +0100 Subject: Simplified and more aggressive worker exit logic --- src/block/resync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/block/resync.rs') diff --git a/src/block/resync.rs b/src/block/resync.rs index 8231b55d..51bb9846 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -540,7 +540,7 @@ impl Worker for ResyncWorker { } } - async fn wait_for_work(&mut self, _must_exit: &watch::Receiver) -> WorkerState { + async fn wait_for_work(&mut self) -> WorkerState { while self.index >= self.manager.resync.persisted.load().n_workers { self.manager.resync.notify.notified().await } -- cgit v1.2.3 From cdb2a591e9d393d24ab5c49bb905b0589b193299 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 3 Jan 2023 14:44:47 +0100 Subject: Refactor how things are migrated --- src/block/resync.rs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/block/resync.rs') diff --git a/src/block/resync.rs b/src/block/resync.rs index 51bb9846..9c7b3b0e 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -63,6 +63,7 @@ struct ResyncPersistedConfig { n_workers: usize, tranquility: u32, } +impl garage_util::migrate::InitialFormat for ResyncPersistedConfig {} enum ResyncIterResult { BusyDidSomething, -- cgit v1.2.3