aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2024-03-15 09:22:33 +0000
committerAlex <alex@adnab.me>2024-03-15 09:22:33 +0000
commita80ce6ab5ad9834c3721eeb4f626d53c9a8bb1f4 (patch)
tree36cabc0cdee72d13b778e5668c454b35e842c293
parent62b01d8705b8525cfe323aecf4ea085ac8671a8e (diff)
parent990205dc3b2f552e9b68168c0aad3e96e2d6f2f0 (diff)
downloadgarage-a80ce6ab5ad9834c3721eeb4f626d53c9a8bb1f4.tar.gz
garage-a80ce6ab5ad9834c3721eeb4f626d53c9a8bb1f4.zip
Merge pull request '`disable_scrub` configuration option' (#774) from disable-scrub into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/774
-rw-r--r--doc/book/operations/durability-repairs.md2
-rw-r--r--doc/book/reference-manual/configuration.md18
-rw-r--r--src/block/manager.rs68
-rw-r--r--src/model/garage.rs9
-rw-r--r--src/util/config.rs4
5 files changed, 61 insertions, 40 deletions
diff --git a/doc/book/operations/durability-repairs.md b/doc/book/operations/durability-repairs.md
index 578899a8..f4450dae 100644
--- a/doc/book/operations/durability-repairs.md
+++ b/doc/book/operations/durability-repairs.md
@@ -19,7 +19,7 @@ connecting to. To run on all nodes, add the `-a` flag as follows:
# Data block operations
-## Data store scrub
+## Data store scrub {#scrub}
Scrubbing the data store means examining each individual data block to check that
their content is correct, by verifying their hash. Any block found to be corrupted
diff --git a/doc/book/reference-manual/configuration.md b/doc/book/reference-manual/configuration.md
index 81af1de0..8e87b7d8 100644
--- a/doc/book/reference-manual/configuration.md
+++ b/doc/book/reference-manual/configuration.md
@@ -14,6 +14,7 @@ metadata_dir = "/var/lib/garage/meta"
data_dir = "/var/lib/garage/data"
metadata_fsync = true
data_fsync = false
+disable_scrub = false
db_engine = "lmdb"
@@ -87,6 +88,7 @@ Top-level configuration options:
[`data_dir`](#data_dir),
[`data_fsync`](#data_fsync),
[`db_engine`](#db_engine),
+[`disable_scrub`](#disable_scrub),
[`lmdb_map_size`](#lmdb_map_size),
[`metadata_dir`](#metadata_dir),
[`metadata_fsync`](#metadata_fsync),
@@ -344,6 +346,22 @@ at the cost of a moderate drop in write performance.
Similarly to `metatada_fsync`, this is likely not necessary
if geographical replication is used.
+#### `disable_scrub` {#disable_scrub}
+
+By default, Garage runs a scrub of the data directory approximately once per
+month, with a random delay to avoid all nodes running at the same time. When
+it scrubs the data directory, Garage will read all of the data files stored on
+disk to check their integrity, and will rebuild any data files that it finds
+corrupted, using the remaining valid copies stored on other nodes.
+See [this page](@/documentation/operations/durability-repair.md#scrub) for details.
+
+Set the `disable_scrub` configuration value to `true` if you don't need Garage
+to scrub the data directory, for instance if you are already scrubbing at the
+filesystem level. Note that in this case, if you find a corrupted data file,
+you should delete it from the data directory and then call `garage repair
+blocks` on the node to ensure that it re-obtains a copy from another node on
+the network.
+
#### `block_size` {#block_size}
Garage splits stored objects in consecutive chunks of size `block_size`
diff --git a/src/block/manager.rs b/src/block/manager.rs
index 890ea8b7..ef7279e9 100644
--- a/src/block/manager.rs
+++ b/src/block/manager.rs
@@ -22,7 +22,7 @@ use garage_net::stream::{read_stream_to_end, stream_asyncread, ByteStream};
use garage_db as db;
use garage_util::background::{vars, BackgroundRunner};
-use garage_util::config::DataDirEnum;
+use garage_util::config::Config;
use garage_util::data::*;
use garage_util::error::*;
use garage_util::metrics::RecordDuration;
@@ -84,6 +84,7 @@ pub struct BlockManager {
data_fsync: bool,
compression_level: Option<i32>,
+ disable_scrub: bool,
mutation_lock: Vec<Mutex<BlockManagerLocked>>,
@@ -119,9 +120,7 @@ struct BlockManagerLocked();
impl BlockManager {
pub fn new(
db: &db::Db,
- data_dir: DataDirEnum,
- data_fsync: bool,
- compression_level: Option<i32>,
+ config: &Config,
replication: TableShardedReplication,
system: Arc<System>,
) -> Result<Arc<Self>, Error> {
@@ -131,11 +130,13 @@ impl BlockManager {
let data_layout = match data_layout_persister.load() {
Ok(mut layout) => {
layout
- .update(&data_dir)
+ .update(&config.data_dir)
.ok_or_message("invalid data_dir config")?;
layout
}
- Err(_) => DataLayout::initialize(&data_dir).ok_or_message("invalid data_dir config")?,
+ Err(_) => {
+ DataLayout::initialize(&config.data_dir).ok_or_message("invalid data_dir config")?
+ }
};
data_layout_persister
.save(&data_layout)
@@ -154,7 +155,7 @@ impl BlockManager {
.endpoint("garage_block/manager.rs/Rpc".to_string());
let metrics = BlockManagerMetrics::new(
- compression_level,
+ config.compression_level,
rc.rc.clone(),
resync.queue.clone(),
resync.errors.clone(),
@@ -166,8 +167,9 @@ impl BlockManager {
replication,
data_layout: ArcSwap::new(Arc::new(data_layout)),
data_layout_persister,
- data_fsync,
- compression_level,
+ data_fsync: config.data_fsync,
+ disable_scrub: config.disable_scrub,
+ compression_level: config.compression_level,
mutation_lock: vec![(); MUTEX_COUNT]
.iter()
.map(|_| Mutex::new(BlockManagerLocked()))
@@ -194,33 +196,37 @@ impl BlockManager {
}
// Spawn scrub worker
- let (scrub_tx, scrub_rx) = mpsc::channel(1);
- self.tx_scrub_command.store(Some(Arc::new(scrub_tx)));
- bg.spawn_worker(ScrubWorker::new(
- self.clone(),
- scrub_rx,
- self.scrub_persister.clone(),
- ));
+ if !self.disable_scrub {
+ let (scrub_tx, scrub_rx) = mpsc::channel(1);
+ self.tx_scrub_command.store(Some(Arc::new(scrub_tx)));
+ bg.spawn_worker(ScrubWorker::new(
+ self.clone(),
+ scrub_rx,
+ self.scrub_persister.clone(),
+ ));
+ }
}
pub fn register_bg_vars(&self, vars: &mut vars::BgVars) {
self.resync.register_bg_vars(vars);
- vars.register_rw(
- &self.scrub_persister,
- "scrub-tranquility",
- |p| p.get_with(|x| x.tranquility),
- |p, tranquility| p.set_with(|x| x.tranquility = tranquility),
- );
- vars.register_ro(&self.scrub_persister, "scrub-last-completed", |p| {
- p.get_with(|x| msec_to_rfc3339(x.time_last_complete_scrub))
- });
- vars.register_ro(&self.scrub_persister, "scrub-next-run", |p| {
- p.get_with(|x| msec_to_rfc3339(x.time_next_run_scrub))
- });
- vars.register_ro(&self.scrub_persister, "scrub-corruptions_detected", |p| {
- p.get_with(|x| x.corruptions_detected)
- });
+ if !self.disable_scrub {
+ vars.register_rw(
+ &self.scrub_persister,
+ "scrub-tranquility",
+ |p| p.get_with(|x| x.tranquility),
+ |p, tranquility| p.set_with(|x| x.tranquility = tranquility),
+ );
+ vars.register_ro(&self.scrub_persister, "scrub-last-completed", |p| {
+ p.get_with(|x| msec_to_rfc3339(x.time_last_complete_scrub))
+ });
+ vars.register_ro(&self.scrub_persister, "scrub-next-run", |p| {
+ p.get_with(|x| msec_to_rfc3339(x.time_next_run_scrub))
+ });
+ vars.register_ro(&self.scrub_persister, "scrub-corruptions_detected", |p| {
+ p.get_with(|x| x.corruptions_detected)
+ });
+ }
}
/// Ask nodes that might have a (possibly compressed) block for it
diff --git a/src/model/garage.rs b/src/model/garage.rs
index 18421ca3..acf943f6 100644
--- a/src/model/garage.rs
+++ b/src/model/garage.rs
@@ -177,14 +177,7 @@ impl Garage {
};
info!("Initialize block manager...");
- let block_manager = BlockManager::new(
- &db,
- config.data_dir.clone(),
- config.data_fsync,
- config.compression_level,
- data_rep_param,
- system.clone(),
- )?;
+ let block_manager = BlockManager::new(&db, &config, data_rep_param, system.clone())?;
block_manager.register_bg_vars(&mut bg_vars);
// ---- admin tables ----
diff --git a/src/util/config.rs b/src/util/config.rs
index 056c625d..7338a506 100644
--- a/src/util/config.rs
+++ b/src/util/config.rs
@@ -23,6 +23,10 @@ pub struct Config {
#[serde(default)]
pub data_fsync: bool,
+ /// Disable automatic scrubbing of the data directory
+ #[serde(default)]
+ pub disable_scrub: bool,
+
/// Size of data blocks to save to disk
#[serde(
deserialize_with = "deserialize_capacity",