diff options
author | Alex Auvolat <alex@adnab.me> | 2022-09-13 16:22:23 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2022-09-13 16:22:23 +0200 |
commit | ab722cb40f5aacf661a280b7eb025acd3aefc1bb (patch) | |
tree | 20c275ab7019b9c1458e4c2daef56a1b93411f8a | |
parent | 38be811b1cd20d9223b481c0ea91cc7e3ee795dc (diff) | |
download | garage-ab722cb40f5aacf661a280b7eb025acd3aefc1bb.tar.gz garage-ab722cb40f5aacf661a280b7eb025acd3aefc1bb.zip |
Add checks on replication_factor of layouts we use (fix #363, fix #364)various-fixes-for-0.8
-rw-r--r-- | src/model/garage.rs | 2 | ||||
-rw-r--r-- | src/rpc/system.rs | 30 |
2 files changed, 26 insertions, 6 deletions
diff --git a/src/model/garage.rs b/src/model/garage.rs index ec1ec956..75012952 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -169,7 +169,7 @@ impl Garage { background.clone(), replication_mode.replication_factor(), &config, - ); + )?; let data_rep_param = TableShardedReplication { system: system.clone(), diff --git a/src/rpc/system.rs b/src/rpc/system.rs index c0e70c61..228b66a4 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -198,7 +198,7 @@ impl System { background: Arc<BackgroundRunner>, replication_factor: usize, config: &Config, - ) -> Arc<Self> { + ) -> Result<Arc<Self>, Error> { let node_key = gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID"); info!( @@ -206,11 +206,21 @@ impl System { hex::encode(&node_key.public_key()[..8]) ); - let persist_cluster_layout = Persister::new(&config.metadata_dir, "cluster_layout"); + let persist_cluster_layout: Persister<ClusterLayout> = + Persister::new(&config.metadata_dir, "cluster_layout"); let persist_peer_list = Persister::new(&config.metadata_dir, "peer_list"); let cluster_layout = match persist_cluster_layout.load() { - Ok(x) => x, + Ok(x) => { + if x.replication_factor != replication_factor { + return Err(Error::Message(format!( + "Prevous cluster layout has replication factor {}, which is different than the one specified in the config file ({}). The previous cluster layout can be purged, if you know what you are doing, simply by deleting the `cluster_layout` file in your metadata directory.", + x.replication_factor, + replication_factor + ))); + } + x + } Err(e) => { info!( "No valid previous cluster layout stored ({}), starting fresh.", @@ -303,7 +313,7 @@ impl System { metadata_dir: config.metadata_dir.clone(), }); sys.system_endpoint.set_handler(sys.clone()); - sys + Ok(sys) } /// Perform bootstraping, starting the ping loop @@ -485,7 +495,7 @@ impl System { let local_info = self.local_status.load(); if local_info.replication_factor < info.replication_factor { - error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs", + error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and will lead to data corruption. Shutting down for safety.", info.replication_factor, local_info.replication_factor); std::process::exit(1); @@ -513,6 +523,16 @@ impl System { self: &Arc<Self>, adv: &ClusterLayout, ) -> Result<SystemRpc, Error> { + if adv.replication_factor != self.replication_factor { + let msg = format!( + "Received a cluster layout from another node with replication factor {}, which is different from what we have in our configuration ({}). Discarding the cluster layout we received.", + adv.replication_factor, + self.replication_factor + ); + error!("{}", msg); + return Err(Error::Message(msg)); + } + let update_ring = self.update_ring.lock().await; let mut layout: ClusterLayout = self.ring.borrow().layout.clone(); |