aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2022-09-13 16:22:23 +0200
committerAlex Auvolat <alex@adnab.me>2022-09-13 16:22:23 +0200
commitab722cb40f5aacf661a280b7eb025acd3aefc1bb (patch)
tree20c275ab7019b9c1458e4c2daef56a1b93411f8a
parent38be811b1cd20d9223b481c0ea91cc7e3ee795dc (diff)
downloadgarage-various-fixes-for-0.8.tar.gz
garage-various-fixes-for-0.8.zip
Add checks on replication_factor of layouts we use (fix #363, fix #364)various-fixes-for-0.8
-rw-r--r--src/model/garage.rs2
-rw-r--r--src/rpc/system.rs30
2 files changed, 26 insertions, 6 deletions
diff --git a/src/model/garage.rs b/src/model/garage.rs
index ec1ec956..75012952 100644
--- a/src/model/garage.rs
+++ b/src/model/garage.rs
@@ -169,7 +169,7 @@ impl Garage {
background.clone(),
replication_mode.replication_factor(),
&config,
- );
+ )?;
let data_rep_param = TableShardedReplication {
system: system.clone(),
diff --git a/src/rpc/system.rs b/src/rpc/system.rs
index c0e70c61..228b66a4 100644
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@@ -198,7 +198,7 @@ impl System {
background: Arc<BackgroundRunner>,
replication_factor: usize,
config: &Config,
- ) -> Arc<Self> {
+ ) -> Result<Arc<Self>, Error> {
let node_key =
gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID");
info!(
@@ -206,11 +206,21 @@ impl System {
hex::encode(&node_key.public_key()[..8])
);
- let persist_cluster_layout = Persister::new(&config.metadata_dir, "cluster_layout");
+ let persist_cluster_layout: Persister<ClusterLayout> =
+ Persister::new(&config.metadata_dir, "cluster_layout");
let persist_peer_list = Persister::new(&config.metadata_dir, "peer_list");
let cluster_layout = match persist_cluster_layout.load() {
- Ok(x) => x,
+ Ok(x) => {
+ if x.replication_factor != replication_factor {
+ return Err(Error::Message(format!(
+ "Prevous cluster layout has replication factor {}, which is different than the one specified in the config file ({}). The previous cluster layout can be purged, if you know what you are doing, simply by deleting the `cluster_layout` file in your metadata directory.",
+ x.replication_factor,
+ replication_factor
+ )));
+ }
+ x
+ }
Err(e) => {
info!(
"No valid previous cluster layout stored ({}), starting fresh.",
@@ -303,7 +313,7 @@ impl System {
metadata_dir: config.metadata_dir.clone(),
});
sys.system_endpoint.set_handler(sys.clone());
- sys
+ Ok(sys)
}
/// Perform bootstraping, starting the ping loop
@@ -485,7 +495,7 @@ impl System {
let local_info = self.local_status.load();
if local_info.replication_factor < info.replication_factor {
- error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs",
+ error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and will lead to data corruption. Shutting down for safety.",
info.replication_factor,
local_info.replication_factor);
std::process::exit(1);
@@ -513,6 +523,16 @@ impl System {
self: &Arc<Self>,
adv: &ClusterLayout,
) -> Result<SystemRpc, Error> {
+ if adv.replication_factor != self.replication_factor {
+ let msg = format!(
+ "Received a cluster layout from another node with replication factor {}, which is different from what we have in our configuration ({}). Discarding the cluster layout we received.",
+ adv.replication_factor,
+ self.replication_factor
+ );
+ error!("{}", msg);
+ return Err(Error::Message(msg));
+ }
+
let update_ring = self.update_ring.lock().await;
let mut layout: ClusterLayout = self.ring.borrow().layout.clone();