From 8a2b1dd422fb57abe611d8c1cf3cb0b55f487189 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 12:55:36 +0100 Subject: wip: split out layout management from System into separate LayoutManager --- src/rpc/layout/manager.rs | 177 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 src/rpc/layout/manager.rs (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs new file mode 100644 index 00000000..a8a77139 --- /dev/null +++ b/src/rpc/layout/manager.rs @@ -0,0 +1,177 @@ +use std::sync::Arc; +use std::time::Duration; + +use tokio::sync::watch; +use tokio::sync::Mutex; + +use netapp::endpoint::Endpoint; +use netapp::peering::fullmesh::FullMeshPeeringStrategy; +use netapp::NodeID; + +use garage_util::config::Config; +use garage_util::data::*; +use garage_util::error::*; +use garage_util::persister::Persister; + +use super::*; +use crate::rpc_helper::*; +use crate::system::*; + +pub struct LayoutManager { + replication_factor: usize, + persist_cluster_layout: Persister, + + pub layout_watch: watch::Receiver>, + update_layout: Mutex>>, + + pub(crate) rpc_helper: RpcHelper, + system_endpoint: Arc>, +} + +impl LayoutManager { + pub fn new( + config: &Config, + node_id: NodeID, + system_endpoint: Arc>, + fullmesh: Arc, + replication_factor: usize, + ) -> Result { + let persist_cluster_layout: Persister = + Persister::new(&config.metadata_dir, "cluster_layout"); + + let cluster_layout = match persist_cluster_layout.load() { + Ok(x) => { + if x.current().replication_factor != replication_factor { + return Err(Error::Message(format!( + "Prevous cluster layout has replication factor {}, which is different than the one specified in the config file ({}). The previous cluster layout can be purged, if you know what you are doing, simply by deleting the `cluster_layout` file in your metadata directory.", + x.current().replication_factor, + replication_factor + ))); + } + x + } + Err(e) => { + info!( + "No valid previous cluster layout stored ({}), starting fresh.", + e + ); + LayoutHistory::new(replication_factor) + } + }; + + let (update_layout, layout_watch) = watch::channel(Arc::new(cluster_layout)); + + let rpc_helper = RpcHelper::new( + node_id.into(), + fullmesh, + layout_watch.clone(), + config.rpc_timeout_msec.map(Duration::from_millis), + ); + + Ok(Self { + replication_factor, + persist_cluster_layout, + layout_watch, + update_layout: Mutex::new(update_layout), + system_endpoint, + rpc_helper, + }) + } + + // ---- PUBLIC INTERFACE ---- + + pub async fn update_cluster_layout(&self, layout: &LayoutHistory) -> Result<(), Error> { + self.handle_advertise_cluster_layout(layout).await?; + Ok(()) + } + + pub fn history(&self) -> watch::Ref> { + self.layout_watch.borrow() + } + + pub(crate) async fn pull_cluster_layout(&self, peer: Uuid) { + let resp = self + .rpc_helper + .call( + &self.system_endpoint, + peer, + SystemRpc::PullClusterLayout, + RequestStrategy::with_priority(PRIO_HIGH), + ) + .await; + if let Ok(SystemRpc::AdvertiseClusterLayout(layout)) = resp { + let _: Result<_, _> = self.handle_advertise_cluster_layout(&layout).await; + } + } + + // ---- INTERNALS --- + + /// Save network configuration to disc + async fn save_cluster_layout(&self) -> Result<(), Error> { + let layout: Arc = self.layout_watch.borrow().clone(); + self.persist_cluster_layout + .save_async(&layout) + .await + .expect("Cannot save current cluster layout"); + Ok(()) + } + + // ---- RPC HANDLERS ---- + + pub(crate) fn handle_pull_cluster_layout(&self) -> SystemRpc { + let layout = self.layout_watch.borrow().as_ref().clone(); + SystemRpc::AdvertiseClusterLayout(layout) + } + + pub(crate) async fn handle_advertise_cluster_layout( + &self, + adv: &LayoutHistory, + ) -> Result { + if adv.current().replication_factor != self.replication_factor { + let msg = format!( + "Received a cluster layout from another node with replication factor {}, which is different from what we have in our configuration ({}). Discarding the cluster layout we received.", + adv.current().replication_factor, + self.replication_factor + ); + error!("{}", msg); + return Err(Error::Message(msg)); + } + + let update_layout = self.update_layout.lock().await; + // TODO: don't clone each time an AdvertiseClusterLayout is received + let mut layout: LayoutHistory = self.layout_watch.borrow().as_ref().clone(); + + let prev_layout_check = layout.check().is_ok(); + if layout.merge(adv) { + if prev_layout_check && layout.check().is_err() { + error!("New cluster layout is invalid, discarding."); + return Err(Error::Message( + "New cluster layout is invalid, discarding.".into(), + )); + } + + update_layout.send(Arc::new(layout.clone()))?; + drop(update_layout); + + /* TODO + tokio::spawn(async move { + if let Err(e) = system + .rpc_helper() + .broadcast( + &system.system_endpoint, + SystemRpc::AdvertiseClusterLayout(layout), + RequestStrategy::with_priority(PRIO_HIGH), + ) + .await + { + warn!("Error while broadcasting new cluster layout: {}", e); + } + }); + */ + + self.save_cluster_layout().await?; + } + + Ok(SystemRpc::Ok) + } +} -- cgit v1.2.3 From 19ef1ec8e7fee3a6c670e6e35dfcc83f0801e604 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 13:34:14 +0100 Subject: layout: more refactoring --- src/rpc/layout/manager.rs | 116 ++++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 39 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index a8a77139..351e0959 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -1,6 +1,8 @@ use std::sync::Arc; use std::time::Duration; +use serde::{Deserialize, Serialize}; + use tokio::sync::watch; use tokio::sync::Mutex; @@ -28,6 +30,16 @@ pub struct LayoutManager { system_endpoint: Arc>, } +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct LayoutStatus { + /// Cluster layout version + pub cluster_layout_version: u64, + /// Hash of cluster layout update trackers + // (TODO) pub cluster_layout_trackers_hash: Hash, + /// Hash of cluster layout staging data + pub cluster_layout_staging_hash: Hash, +} + impl LayoutManager { pub fn new( config: &Config, @@ -35,7 +47,7 @@ impl LayoutManager { system_endpoint: Arc>, fullmesh: Arc, replication_factor: usize, - ) -> Result { + ) -> Result, Error> { let persist_cluster_layout: Persister = Persister::new(&config.metadata_dir, "cluster_layout"); @@ -68,28 +80,39 @@ impl LayoutManager { config.rpc_timeout_msec.map(Duration::from_millis), ); - Ok(Self { + Ok(Arc::new(Self { replication_factor, persist_cluster_layout, layout_watch, update_layout: Mutex::new(update_layout), system_endpoint, rpc_helper, - }) + })) } // ---- PUBLIC INTERFACE ---- - pub async fn update_cluster_layout(&self, layout: &LayoutHistory) -> Result<(), Error> { + pub fn status(&self) -> LayoutStatus { + let layout = self.layout(); + LayoutStatus { + cluster_layout_version: layout.current().version, + cluster_layout_staging_hash: layout.staging_hash, + } + } + + pub async fn update_cluster_layout( + self: &Arc, + layout: &LayoutHistory, + ) -> Result<(), Error> { self.handle_advertise_cluster_layout(layout).await?; Ok(()) } - pub fn history(&self) -> watch::Ref> { + pub fn layout(&self) -> watch::Ref> { self.layout_watch.borrow() } - pub(crate) async fn pull_cluster_layout(&self, peer: Uuid) { + pub(crate) async fn pull_cluster_layout(self: &Arc, peer: Uuid) { let resp = self .rpc_helper .call( @@ -118,13 +141,25 @@ impl LayoutManager { // ---- RPC HANDLERS ---- + pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, status: &LayoutStatus) { + let local_status = self.status(); + if status.cluster_layout_version > local_status.cluster_layout_version + || status.cluster_layout_staging_hash != local_status.cluster_layout_staging_hash + { + tokio::spawn({ + let this = self.clone(); + async move { this.pull_cluster_layout(from).await } + }); + } + } + pub(crate) fn handle_pull_cluster_layout(&self) -> SystemRpc { - let layout = self.layout_watch.borrow().as_ref().clone(); + let layout = self.layout_watch.borrow().clone(); SystemRpc::AdvertiseClusterLayout(layout) } pub(crate) async fn handle_advertise_cluster_layout( - &self, + self: &Arc, adv: &LayoutHistory, ) -> Result { if adv.current().replication_factor != self.replication_factor { @@ -137,39 +172,42 @@ impl LayoutManager { return Err(Error::Message(msg)); } - let update_layout = self.update_layout.lock().await; - // TODO: don't clone each time an AdvertiseClusterLayout is received - let mut layout: LayoutHistory = self.layout_watch.borrow().as_ref().clone(); - - let prev_layout_check = layout.check().is_ok(); - if layout.merge(adv) { - if prev_layout_check && layout.check().is_err() { - error!("New cluster layout is invalid, discarding."); - return Err(Error::Message( - "New cluster layout is invalid, discarding.".into(), - )); - } - - update_layout.send(Arc::new(layout.clone()))?; - drop(update_layout); - - /* TODO - tokio::spawn(async move { - if let Err(e) = system - .rpc_helper() - .broadcast( - &system.system_endpoint, - SystemRpc::AdvertiseClusterLayout(layout), - RequestStrategy::with_priority(PRIO_HIGH), - ) - .await - { - warn!("Error while broadcasting new cluster layout: {}", e); + if *adv != **self.layout_watch.borrow() { + let update_layout = self.update_layout.lock().await; + let mut layout: LayoutHistory = self.layout_watch.borrow().as_ref().clone(); + + let prev_layout_check = layout.check().is_ok(); + if layout.merge(adv) { + if prev_layout_check && layout.check().is_err() { + error!("New cluster layout is invalid, discarding."); + return Err(Error::Message( + "New cluster layout is invalid, discarding.".into(), + )); } - }); - */ - self.save_cluster_layout().await?; + let layout = Arc::new(layout); + update_layout.send(layout.clone())?; + drop(update_layout); // release mutex + + tokio::spawn({ + let this = self.clone(); + async move { + if let Err(e) = this + .rpc_helper + .broadcast( + &this.system_endpoint, + SystemRpc::AdvertiseClusterLayout(layout), + RequestStrategy::with_priority(PRIO_HIGH), + ) + .await + { + warn!("Error while broadcasting new cluster layout: {}", e); + } + } + }); + + self.save_cluster_layout().await?; + } } Ok(SystemRpc::Ok) -- cgit v1.2.3 From bfb1845fdc981a370539d641a5d80f438f184f07 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 14:12:05 +0100 Subject: layout: refactor to use a RwLock on LayoutHistory --- src/rpc/layout/manager.rs | 93 ++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 46 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index 351e0959..c021039b 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -1,10 +1,9 @@ -use std::sync::Arc; +use std::sync::{Arc, RwLock, RwLockReadGuard}; use std::time::Duration; use serde::{Deserialize, Serialize}; -use tokio::sync::watch; -use tokio::sync::Mutex; +use tokio::sync::Notify; use netapp::endpoint::Endpoint; use netapp::peering::fullmesh::FullMeshPeeringStrategy; @@ -23,8 +22,8 @@ pub struct LayoutManager { replication_factor: usize, persist_cluster_layout: Persister, - pub layout_watch: watch::Receiver>, - update_layout: Mutex>>, + layout: Arc>, + pub(crate) change_notify: Arc, pub(crate) rpc_helper: RpcHelper, system_endpoint: Arc>, @@ -71,20 +70,21 @@ impl LayoutManager { } }; - let (update_layout, layout_watch) = watch::channel(Arc::new(cluster_layout)); + let layout = Arc::new(RwLock::new(cluster_layout)); + let change_notify = Arc::new(Notify::new()); let rpc_helper = RpcHelper::new( node_id.into(), fullmesh, - layout_watch.clone(), + layout.clone(), config.rpc_timeout_msec.map(Duration::from_millis), ); Ok(Arc::new(Self { replication_factor, persist_cluster_layout, - layout_watch, - update_layout: Mutex::new(update_layout), + layout, + change_notify, system_endpoint, rpc_helper, })) @@ -108,8 +108,8 @@ impl LayoutManager { Ok(()) } - pub fn layout(&self) -> watch::Ref> { - self.layout_watch.borrow() + pub fn layout(&self) -> RwLockReadGuard<'_, LayoutHistory> { + self.layout.read().unwrap() } pub(crate) async fn pull_cluster_layout(self: &Arc, peer: Uuid) { @@ -131,7 +131,7 @@ impl LayoutManager { /// Save network configuration to disc async fn save_cluster_layout(&self) -> Result<(), Error> { - let layout: Arc = self.layout_watch.borrow().clone(); + let layout = self.layout.read().unwrap().clone(); // TODO: avoid cloning self.persist_cluster_layout .save_async(&layout) .await @@ -139,6 +139,22 @@ impl LayoutManager { Ok(()) } + fn merge_layout(&self, adv: &LayoutHistory) -> Option { + let mut layout = self.layout.write().unwrap(); + let prev_layout_check = layout.check().is_ok(); + + if !prev_layout_check || adv.check().is_ok() { + if layout.merge(adv) { + if prev_layout_check && layout.check().is_err() { + panic!("Merged two correct layouts and got an incorrect layout."); + } + + return Some(layout.clone()); + } + } + None + } + // ---- RPC HANDLERS ---- pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, status: &LayoutStatus) { @@ -154,7 +170,7 @@ impl LayoutManager { } pub(crate) fn handle_pull_cluster_layout(&self) -> SystemRpc { - let layout = self.layout_watch.borrow().clone(); + let layout = self.layout.read().unwrap().clone(); // TODO: avoid cloning SystemRpc::AdvertiseClusterLayout(layout) } @@ -172,42 +188,27 @@ impl LayoutManager { return Err(Error::Message(msg)); } - if *adv != **self.layout_watch.borrow() { - let update_layout = self.update_layout.lock().await; - let mut layout: LayoutHistory = self.layout_watch.borrow().as_ref().clone(); + if let Some(new_layout) = self.merge_layout(adv) { + self.change_notify.notify_waiters(); - let prev_layout_check = layout.check().is_ok(); - if layout.merge(adv) { - if prev_layout_check && layout.check().is_err() { - error!("New cluster layout is invalid, discarding."); - return Err(Error::Message( - "New cluster layout is invalid, discarding.".into(), - )); - } - - let layout = Arc::new(layout); - update_layout.send(layout.clone())?; - drop(update_layout); // release mutex - - tokio::spawn({ - let this = self.clone(); - async move { - if let Err(e) = this - .rpc_helper - .broadcast( - &this.system_endpoint, - SystemRpc::AdvertiseClusterLayout(layout), - RequestStrategy::with_priority(PRIO_HIGH), - ) - .await - { - warn!("Error while broadcasting new cluster layout: {}", e); - } + tokio::spawn({ + let this = self.clone(); + async move { + if let Err(e) = this + .rpc_helper + .broadcast( + &this.system_endpoint, + SystemRpc::AdvertiseClusterLayout(new_layout), + RequestStrategy::with_priority(PRIO_HIGH), + ) + .await + { + warn!("Error while broadcasting new cluster layout: {}", e); } - }); + } + }); - self.save_cluster_layout().await?; - } + self.save_cluster_layout().await?; } Ok(SystemRpc::Ok) -- cgit v1.2.3 From 94caf9c0c1342ce1d2ba3ac7af39fb133721ee83 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 14:53:34 +0100 Subject: layout: separate code path for synchronizing update trackers only --- src/rpc/layout/manager.rs | 140 +++++++++++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 40 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index c021039b..a2502f58 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -19,6 +19,7 @@ use crate::rpc_helper::*; use crate::system::*; pub struct LayoutManager { + node_id: Uuid, replication_factor: usize, persist_cluster_layout: Persister, @@ -34,7 +35,7 @@ pub struct LayoutStatus { /// Cluster layout version pub cluster_layout_version: u64, /// Hash of cluster layout update trackers - // (TODO) pub cluster_layout_trackers_hash: Hash, + pub cluster_layout_trackers_hash: Hash, /// Hash of cluster layout staging data pub cluster_layout_staging_hash: Hash, } @@ -81,6 +82,7 @@ impl LayoutManager { ); Ok(Arc::new(Self { + node_id: node_id.into(), replication_factor, persist_cluster_layout, layout, @@ -92,10 +94,15 @@ impl LayoutManager { // ---- PUBLIC INTERFACE ---- + pub fn layout(&self) -> RwLockReadGuard<'_, LayoutHistory> { + self.layout.read().unwrap() + } + pub fn status(&self) -> LayoutStatus { let layout = self.layout(); LayoutStatus { cluster_layout_version: layout.current().version, + cluster_layout_trackers_hash: layout.trackers_hash, cluster_layout_staging_hash: layout.staging_hash, } } @@ -108,11 +115,35 @@ impl LayoutManager { Ok(()) } - pub fn layout(&self) -> RwLockReadGuard<'_, LayoutHistory> { - self.layout.read().unwrap() + // ---- INTERNALS --- + + fn merge_layout(&self, adv: &LayoutHistory) -> Option { + let mut layout = self.layout.write().unwrap(); + let prev_layout_check = layout.check().is_ok(); + + if !prev_layout_check || adv.check().is_ok() { + if layout.merge(adv) { + if prev_layout_check && layout.check().is_err() { + panic!("Merged two correct layouts and got an incorrect layout."); + } + + return Some(layout.clone()); + } + } + None } - pub(crate) async fn pull_cluster_layout(self: &Arc, peer: Uuid) { + fn merge_layout_trackers(&self, adv: &UpdateTrackers) -> Option { + let mut layout = self.layout.write().unwrap(); + if layout.update_trackers != *adv { + if layout.update_trackers.merge(adv) { + return Some(layout.update_trackers.clone()); + } + } + None + } + + async fn pull_cluster_layout(self: &Arc, peer: Uuid) { let resp = self .rpc_helper .call( @@ -123,15 +154,35 @@ impl LayoutManager { ) .await; if let Ok(SystemRpc::AdvertiseClusterLayout(layout)) = resp { - let _: Result<_, _> = self.handle_advertise_cluster_layout(&layout).await; + if let Err(e) = self.handle_advertise_cluster_layout(&layout).await { + warn!("In pull_cluster_layout: {}", e); + } } } - // ---- INTERNALS --- + async fn pull_cluster_layout_trackers(self: &Arc, peer: Uuid) { + let resp = self + .rpc_helper + .call( + &self.system_endpoint, + peer, + SystemRpc::PullClusterLayoutTrackers, + RequestStrategy::with_priority(PRIO_HIGH), + ) + .await; + if let Ok(SystemRpc::AdvertiseClusterLayoutTrackers(trackers)) = resp { + if let Err(e) = self + .handle_advertise_cluster_layout_trackers(&trackers) + .await + { + warn!("In pull_cluster_layout_trackers: {}", e); + } + } + } - /// Save network configuration to disc + /// Save cluster layout data to disk async fn save_cluster_layout(&self) -> Result<(), Error> { - let layout = self.layout.read().unwrap().clone(); // TODO: avoid cloning + let layout = self.layout.read().unwrap().clone(); self.persist_cluster_layout .save_async(&layout) .await @@ -139,33 +190,41 @@ impl LayoutManager { Ok(()) } - fn merge_layout(&self, adv: &LayoutHistory) -> Option { - let mut layout = self.layout.write().unwrap(); - let prev_layout_check = layout.check().is_ok(); - - if !prev_layout_check || adv.check().is_ok() { - if layout.merge(adv) { - if prev_layout_check && layout.check().is_err() { - panic!("Merged two correct layouts and got an incorrect layout."); + fn broadcast_update(self: &Arc, rpc: SystemRpc) { + tokio::spawn({ + let this = self.clone(); + async move { + if let Err(e) = this + .rpc_helper + .broadcast( + &this.system_endpoint, + rpc, + RequestStrategy::with_priority(PRIO_HIGH), + ) + .await + { + warn!("Error while broadcasting new cluster layout: {}", e); } - - return Some(layout.clone()); } - } - None + }); } // ---- RPC HANDLERS ---- - pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, status: &LayoutStatus) { - let local_status = self.status(); - if status.cluster_layout_version > local_status.cluster_layout_version - || status.cluster_layout_staging_hash != local_status.cluster_layout_staging_hash + pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, remote: &LayoutStatus) { + let local = self.status(); + if remote.cluster_layout_version > local.cluster_layout_version + || remote.cluster_layout_staging_hash != local.cluster_layout_staging_hash { tokio::spawn({ let this = self.clone(); async move { this.pull_cluster_layout(from).await } }); + } else if remote.cluster_layout_trackers_hash != local.cluster_layout_trackers_hash { + tokio::spawn({ + let this = self.clone(); + async move { this.pull_cluster_layout_trackers(from).await } + }); } } @@ -174,6 +233,11 @@ impl LayoutManager { SystemRpc::AdvertiseClusterLayout(layout) } + pub(crate) fn handle_pull_cluster_layout_trackers(&self) -> SystemRpc { + let layout = self.layout.read().unwrap(); + SystemRpc::AdvertiseClusterLayoutTrackers(layout.update_trackers.clone()) + } + pub(crate) async fn handle_advertise_cluster_layout( self: &Arc, adv: &LayoutHistory, @@ -190,24 +254,20 @@ impl LayoutManager { if let Some(new_layout) = self.merge_layout(adv) { self.change_notify.notify_waiters(); + self.broadcast_update(SystemRpc::AdvertiseClusterLayout(new_layout)); + self.save_cluster_layout().await?; + } - tokio::spawn({ - let this = self.clone(); - async move { - if let Err(e) = this - .rpc_helper - .broadcast( - &this.system_endpoint, - SystemRpc::AdvertiseClusterLayout(new_layout), - RequestStrategy::with_priority(PRIO_HIGH), - ) - .await - { - warn!("Error while broadcasting new cluster layout: {}", e); - } - } - }); + Ok(SystemRpc::Ok) + } + pub(crate) async fn handle_advertise_cluster_layout_trackers( + self: &Arc, + trackers: &UpdateTrackers, + ) -> Result { + if let Some(new_trackers) = self.merge_layout_trackers(trackers) { + self.change_notify.notify_waiters(); + self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(new_trackers)); self.save_cluster_layout().await?; } -- cgit v1.2.3 From 03ebf18830dff1983f09abe6ecb8d8d26daeb446 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 15:31:59 +0100 Subject: layout: begin managing the update tracker values --- src/rpc/layout/manager.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index a2502f58..ffcc938b 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -51,7 +51,7 @@ impl LayoutManager { let persist_cluster_layout: Persister = Persister::new(&config.metadata_dir, "cluster_layout"); - let cluster_layout = match persist_cluster_layout.load() { + let mut cluster_layout = match persist_cluster_layout.load() { Ok(x) => { if x.current().replication_factor != replication_factor { return Err(Error::Message(format!( @@ -71,6 +71,8 @@ impl LayoutManager { } }; + cluster_layout.update_trackers(node_id.into()); + let layout = Arc::new(RwLock::new(cluster_layout)); let change_notify = Arc::new(Notify::new()); @@ -126,7 +128,7 @@ impl LayoutManager { if prev_layout_check && layout.check().is_err() { panic!("Merged two correct layouts and got an incorrect layout."); } - + layout.update_trackers(self.node_id); return Some(layout.clone()); } } @@ -137,6 +139,7 @@ impl LayoutManager { let mut layout = self.layout.write().unwrap(); if layout.update_trackers != *adv { if layout.update_trackers.merge(adv) { + layout.update_trackers(self.node_id); return Some(layout.update_trackers.clone()); } } -- cgit v1.2.3 From 9d95f6f7040c1899715ae4f984313427b1432758 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 15:52:45 +0100 Subject: layout: fix tracker bugs --- src/rpc/layout/manager.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index ffcc938b..c1417dac 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -125,10 +125,10 @@ impl LayoutManager { if !prev_layout_check || adv.check().is_ok() { if layout.merge(adv) { + layout.update_trackers(self.node_id); if prev_layout_check && layout.check().is_err() { panic!("Merged two correct layouts and got an incorrect layout."); } - layout.update_trackers(self.node_id); return Some(layout.clone()); } } @@ -245,6 +245,8 @@ impl LayoutManager { self: &Arc, adv: &LayoutHistory, ) -> Result { + debug!("handle_advertise_cluster_layout: {:?}", adv); + if adv.current().replication_factor != self.replication_factor { let msg = format!( "Received a cluster layout from another node with replication factor {}, which is different from what we have in our configuration ({}). Discarding the cluster layout we received.", @@ -256,6 +258,8 @@ impl LayoutManager { } if let Some(new_layout) = self.merge_layout(adv) { + debug!("handle_advertise_cluster_layout: some changes were added to the current stuff"); + self.change_notify.notify_waiters(); self.broadcast_update(SystemRpc::AdvertiseClusterLayout(new_layout)); self.save_cluster_layout().await?; @@ -268,6 +272,8 @@ impl LayoutManager { self: &Arc, trackers: &UpdateTrackers, ) -> Result { + debug!("handle_advertise_cluster_layout_trackers: {:?}", trackers); + if let Some(new_trackers) = self.merge_layout_trackers(trackers) { self.change_notify.notify_waiters(); self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(new_trackers)); -- cgit v1.2.3 From df36cf3099f6010c4fc62109b85d4d1e62f160cc Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 16:32:31 +0100 Subject: layout: add helpers to LayoutHistory and prepare integration with Table --- src/rpc/layout/manager.rs | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index c1417dac..b0302b12 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -1,4 +1,5 @@ -use std::sync::{Arc, RwLock, RwLockReadGuard}; +use std::collections::HashMap; +use std::sync::{Arc, Mutex, RwLock, RwLockReadGuard}; use std::time::Duration; use serde::{Deserialize, Serialize}; @@ -26,6 +27,8 @@ pub struct LayoutManager { layout: Arc>, pub(crate) change_notify: Arc, + table_sync_version: Mutex>, + pub(crate) rpc_helper: RpcHelper, system_endpoint: Arc>, } @@ -117,6 +120,34 @@ impl LayoutManager { Ok(()) } + pub fn add_table(&self, table_name: &'static str) { + let first_version = self.layout().versions.first().unwrap().version; + + self.table_sync_version + .lock() + .unwrap() + .insert(table_name.to_string(), first_version); + } + + pub fn sync_table_until(self: &Arc, table_name: &'static str, version: u64) { + let mut table_sync_version = self.table_sync_version.lock().unwrap(); + *table_sync_version.get_mut(table_name).unwrap() = version; + let sync_until = table_sync_version.iter().map(|(_, v)| *v).max().unwrap(); + drop(table_sync_version); + + let mut layout = self.layout.write().unwrap(); + if layout + .update_trackers + .sync_map + .set_max(self.node_id, sync_until) + { + layout.update_hashes(); + self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers( + layout.update_trackers.clone(), + )); + } + } + // ---- INTERNALS --- fn merge_layout(&self, adv: &LayoutHistory) -> Option { -- cgit v1.2.3 From ce89d1ddabe3b9e638b0173949726522ae9a0311 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 11 Nov 2023 12:08:32 +0100 Subject: table sync: adapt to new layout history --- src/rpc/layout/manager.rs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index b0302b12..7d60bae6 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -92,6 +92,7 @@ impl LayoutManager { persist_cluster_layout, layout, change_notify, + table_sync_version: Mutex::new(HashMap::new()), system_endpoint, rpc_helper, })) -- cgit v1.2.3 From df24bb806d64d5d5e748c35efe3f49ad3dda709e Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 11 Nov 2023 12:37:33 +0100 Subject: layout/sync: fix bugs and add tracing --- src/rpc/layout/manager.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index 7d60bae6..ce8b6f61 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -133,7 +133,7 @@ impl LayoutManager { pub fn sync_table_until(self: &Arc, table_name: &'static str, version: u64) { let mut table_sync_version = self.table_sync_version.lock().unwrap(); *table_sync_version.get_mut(table_name).unwrap() = version; - let sync_until = table_sync_version.iter().map(|(_, v)| *v).max().unwrap(); + let sync_until = table_sync_version.iter().map(|(_, v)| *v).min().unwrap(); drop(table_sync_version); let mut layout = self.layout.write().unwrap(); @@ -142,6 +142,7 @@ impl LayoutManager { .sync_map .set_max(self.node_id, sync_until) { + debug!("sync_until updated to {}", sync_until); layout.update_hashes(); self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers( layout.update_trackers.clone(), @@ -277,7 +278,12 @@ impl LayoutManager { self: &Arc, adv: &LayoutHistory, ) -> Result { - debug!("handle_advertise_cluster_layout: {:?}", adv); + debug!( + "handle_advertise_cluster_layout: {} versions, last={}, trackers={:?}", + adv.versions.len(), + adv.current().version, + adv.update_trackers + ); if adv.current().replication_factor != self.replication_factor { let msg = format!( -- cgit v1.2.3 From 65066c70640371cc318faddfb4c05c96de18e86d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 15 Nov 2023 13:28:30 +0100 Subject: layout: wip cache global mins --- src/rpc/layout/manager.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index ce8b6f61..21ec2d8d 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -74,7 +74,7 @@ impl LayoutManager { } }; - cluster_layout.update_trackers(node_id.into()); + cluster_layout.update_trackers_of(node_id.into()); let layout = Arc::new(RwLock::new(cluster_layout)); let change_notify = Arc::new(Notify::new()); @@ -158,7 +158,7 @@ impl LayoutManager { if !prev_layout_check || adv.check().is_ok() { if layout.merge(adv) { - layout.update_trackers(self.node_id); + layout.update_trackers_of(self.node_id); if prev_layout_check && layout.check().is_err() { panic!("Merged two correct layouts and got an incorrect layout."); } @@ -172,7 +172,7 @@ impl LayoutManager { let mut layout = self.layout.write().unwrap(); if layout.update_trackers != *adv { if layout.update_trackers.merge(adv) { - layout.update_trackers(self.node_id); + layout.update_trackers_of(self.node_id); return Some(layout.update_trackers.clone()); } } -- cgit v1.2.3 From 393c4d4515e0cdadadc8de8ae2df12e4371cff88 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 15 Nov 2023 14:20:50 +0100 Subject: layout: add helper for cached/external values to centralize recomputation --- src/rpc/layout/manager.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index 21ec2d8d..e270ad21 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -24,7 +24,7 @@ pub struct LayoutManager { replication_factor: usize, persist_cluster_layout: Persister, - layout: Arc>, + layout: Arc>, pub(crate) change_notify: Arc, table_sync_version: Mutex>, @@ -54,7 +54,7 @@ impl LayoutManager { let persist_cluster_layout: Persister = Persister::new(&config.metadata_dir, "cluster_layout"); - let mut cluster_layout = match persist_cluster_layout.load() { + let cluster_layout = match persist_cluster_layout.load() { Ok(x) => { if x.current().replication_factor != replication_factor { return Err(Error::Message(format!( @@ -74,6 +74,7 @@ impl LayoutManager { } }; + let mut cluster_layout = LayoutHelper::new(cluster_layout); cluster_layout.update_trackers_of(node_id.into()); let layout = Arc::new(RwLock::new(cluster_layout)); @@ -100,7 +101,7 @@ impl LayoutManager { // ---- PUBLIC INTERFACE ---- - pub fn layout(&self) -> RwLockReadGuard<'_, LayoutHistory> { + pub fn layout(&self) -> RwLockReadGuard<'_, LayoutHelper> { self.layout.read().unwrap() } @@ -108,8 +109,8 @@ impl LayoutManager { let layout = self.layout(); LayoutStatus { cluster_layout_version: layout.current().version, - cluster_layout_trackers_hash: layout.trackers_hash, - cluster_layout_staging_hash: layout.staging_hash, + cluster_layout_trackers_hash: layout.trackers_hash(), + cluster_layout_staging_hash: layout.staging_hash(), } } @@ -137,13 +138,8 @@ impl LayoutManager { drop(table_sync_version); let mut layout = self.layout.write().unwrap(); - if layout - .update_trackers - .sync_map - .set_max(self.node_id, sync_until) - { + if layout.update(|l| l.update_trackers.sync_map.set_max(self.node_id, sync_until)) { debug!("sync_until updated to {}", sync_until); - layout.update_hashes(); self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers( layout.update_trackers.clone(), )); @@ -157,7 +153,7 @@ impl LayoutManager { let prev_layout_check = layout.check().is_ok(); if !prev_layout_check || adv.check().is_ok() { - if layout.merge(adv) { + if layout.update(|l| l.merge(adv)) { layout.update_trackers_of(self.node_id); if prev_layout_check && layout.check().is_err() { panic!("Merged two correct layouts and got an incorrect layout."); @@ -171,7 +167,7 @@ impl LayoutManager { fn merge_layout_trackers(&self, adv: &UpdateTrackers) -> Option { let mut layout = self.layout.write().unwrap(); if layout.update_trackers != *adv { - if layout.update_trackers.merge(adv) { + if layout.update(|l| l.update_trackers.merge(adv)) { layout.update_trackers_of(self.node_id); return Some(layout.update_trackers.clone()); } -- cgit v1.2.3 From 33c8a489b0a9c0e869282bfc19c548f5a3e02e8c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 15 Nov 2023 15:40:44 +0100 Subject: layou: implement ack locking --- src/rpc/layout/manager.rs | 74 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 6 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index e270ad21..4e073d1f 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -1,5 +1,5 @@ use std::collections::HashMap; -use std::sync::{Arc, Mutex, RwLock, RwLockReadGuard}; +use std::sync::{atomic::Ordering, Arc, Mutex, RwLock, RwLockReadGuard}; use std::time::Duration; use serde::{Deserialize, Serialize}; @@ -74,8 +74,8 @@ impl LayoutManager { } }; - let mut cluster_layout = LayoutHelper::new(cluster_layout); - cluster_layout.update_trackers_of(node_id.into()); + let mut cluster_layout = LayoutHelper::new(cluster_layout, Default::default()); + cluster_layout.update_trackers(node_id.into()); let layout = Arc::new(RwLock::new(cluster_layout)); let change_notify = Arc::new(Notify::new()); @@ -139,13 +139,36 @@ impl LayoutManager { let mut layout = self.layout.write().unwrap(); if layout.update(|l| l.update_trackers.sync_map.set_max(self.node_id, sync_until)) { - debug!("sync_until updated to {}", sync_until); + info!("sync_until updated to {}", sync_until); self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers( layout.update_trackers.clone(), )); } } + fn ack_new_version(self: &Arc) { + let mut layout = self.layout.write().unwrap(); + if layout.ack_max_free(self.node_id) { + self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers( + layout.update_trackers.clone(), + )); + } + } + + // ---- ACK LOCKING ---- + + pub fn write_sets_of(self: &Arc, position: &Hash) -> WriteLock>> { + let layout = self.layout(); + let version = layout.current().version; + let nodes = layout.write_sets_of(position); + layout + .ack_lock + .get(&version) + .unwrap() + .fetch_add(1, Ordering::Relaxed); + WriteLock::new(version, self, nodes) + } + // ---- INTERNALS --- fn merge_layout(&self, adv: &LayoutHistory) -> Option { @@ -154,7 +177,7 @@ impl LayoutManager { if !prev_layout_check || adv.check().is_ok() { if layout.update(|l| l.merge(adv)) { - layout.update_trackers_of(self.node_id); + layout.update_trackers(self.node_id); if prev_layout_check && layout.check().is_err() { panic!("Merged two correct layouts and got an incorrect layout."); } @@ -168,7 +191,7 @@ impl LayoutManager { let mut layout = self.layout.write().unwrap(); if layout.update_trackers != *adv { if layout.update(|l| l.update_trackers.merge(adv)) { - layout.update_trackers_of(self.node_id); + layout.update_trackers(self.node_id); return Some(layout.update_trackers.clone()); } } @@ -317,3 +340,42 @@ impl LayoutManager { Ok(SystemRpc::Ok) } } + +// ---- ack lock ---- + +pub struct WriteLock { + layout_version: u64, + layout_manager: Arc, + value: T, +} + +impl WriteLock { + fn new(version: u64, layout_manager: &Arc, value: T) -> Self { + Self { + layout_version: version, + layout_manager: layout_manager.clone(), + value, + } + } +} + +impl AsRef for WriteLock { + fn as_ref(&self) -> &T { + &self.value + } +} + +impl Drop for WriteLock { + fn drop(&mut self) { + let layout = self.layout_manager.layout(); // acquire read lock + if let Some(counter) = layout.ack_lock.get(&self.layout_version) { + let prev_lock = counter.fetch_sub(1, Ordering::Relaxed); + if prev_lock == 1 && layout.current().version > self.layout_version { + drop(layout); // release read lock, write lock will be acquired + self.layout_manager.ack_new_version(); + } + } else { + error!("Could not find ack lock counter for layout version {}. This probably indicates a bug in Garage.", self.layout_version); + } + } +} -- cgit v1.2.3 From ad5c6f779f7fdfdc0569920c830c59197023515a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 16 Nov 2023 13:26:43 +0100 Subject: layout: split helper in separate file; more precise difference tracking --- src/rpc/layout/manager.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index 4e073d1f..85d94ffa 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -184,17 +184,20 @@ impl LayoutManager { return Some(layout.clone()); } } + None } fn merge_layout_trackers(&self, adv: &UpdateTrackers) -> Option { let mut layout = self.layout.write().unwrap(); + if layout.update_trackers != *adv { if layout.update(|l| l.update_trackers.merge(adv)) { layout.update_trackers(self.node_id); return Some(layout.update_trackers.clone()); } } + None } @@ -284,7 +287,7 @@ impl LayoutManager { } pub(crate) fn handle_pull_cluster_layout(&self) -> SystemRpc { - let layout = self.layout.read().unwrap().clone(); // TODO: avoid cloning + let layout = self.layout.read().unwrap().clone(); SystemRpc::AdvertiseClusterLayout(layout) } -- cgit v1.2.3 From 707442f5de416fdbed4681a33b739f0a787b7834 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 16 Nov 2023 13:51:40 +0100 Subject: layout: refactor digests and add "!=" assertions before epidemic bcast --- src/rpc/layout/manager.rs | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index 85d94ffa..c65831a2 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -2,8 +2,6 @@ use std::collections::HashMap; use std::sync::{atomic::Ordering, Arc, Mutex, RwLock, RwLockReadGuard}; use std::time::Duration; -use serde::{Deserialize, Serialize}; - use tokio::sync::Notify; use netapp::endpoint::Endpoint; @@ -33,16 +31,6 @@ pub struct LayoutManager { system_endpoint: Arc>, } -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub struct LayoutStatus { - /// Cluster layout version - pub cluster_layout_version: u64, - /// Hash of cluster layout update trackers - pub cluster_layout_trackers_hash: Hash, - /// Hash of cluster layout staging data - pub cluster_layout_staging_hash: Hash, -} - impl LayoutManager { pub fn new( config: &Config, @@ -105,15 +93,6 @@ impl LayoutManager { self.layout.read().unwrap() } - pub fn status(&self) -> LayoutStatus { - let layout = self.layout(); - LayoutStatus { - cluster_layout_version: layout.current().version, - cluster_layout_trackers_hash: layout.trackers_hash(), - cluster_layout_staging_hash: layout.staging_hash(), - } - } - pub async fn update_cluster_layout( self: &Arc, layout: &LayoutHistory, @@ -173,6 +152,7 @@ impl LayoutManager { fn merge_layout(&self, adv: &LayoutHistory) -> Option { let mut layout = self.layout.write().unwrap(); + let prev_digest = layout.digest(); let prev_layout_check = layout.check().is_ok(); if !prev_layout_check || adv.check().is_ok() { @@ -181,6 +161,7 @@ impl LayoutManager { if prev_layout_check && layout.check().is_err() { panic!("Merged two correct layouts and got an incorrect layout."); } + assert!(layout.digest() != prev_digest); return Some(layout.clone()); } } @@ -190,10 +171,12 @@ impl LayoutManager { fn merge_layout_trackers(&self, adv: &UpdateTrackers) -> Option { let mut layout = self.layout.write().unwrap(); + let prev_digest = layout.digest(); if layout.update_trackers != *adv { if layout.update(|l| l.update_trackers.merge(adv)) { layout.update_trackers(self.node_id); + assert!(layout.digest() != prev_digest); return Some(layout.update_trackers.clone()); } } @@ -269,16 +252,17 @@ impl LayoutManager { // ---- RPC HANDLERS ---- - pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, remote: &LayoutStatus) { - let local = self.status(); - if remote.cluster_layout_version > local.cluster_layout_version - || remote.cluster_layout_staging_hash != local.cluster_layout_staging_hash + pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, remote: &LayoutDigest) { + let local = self.layout().digest(); + if remote.current_version > local.current_version + || remote.active_versions != local.active_versions + || remote.staging_hash != local.staging_hash { tokio::spawn({ let this = self.clone(); async move { this.pull_cluster_layout(from).await } }); - } else if remote.cluster_layout_trackers_hash != local.cluster_layout_trackers_hash { + } else if remote.trackers_hash != local.trackers_hash { tokio::spawn({ let this = self.clone(); async move { this.pull_cluster_layout_trackers(from).await } -- cgit v1.2.3 From 95eb13eb08d517d328e3c8aeb222440a27211ee9 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 7 Dec 2023 10:55:15 +0100 Subject: rpc: refactor result tracking for quorum sets --- src/rpc/layout/manager.rs | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index c65831a2..17465019 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -352,6 +352,12 @@ impl AsRef for WriteLock { } } +impl AsMut for WriteLock { + fn as_mut(&mut self) -> &mut T { + &mut self.value + } +} + impl Drop for WriteLock { fn drop(&mut self) { let layout = self.layout_manager.layout(); // acquire read lock -- cgit v1.2.3 From d90de365b3b30cb631b22fcd62c98bddb5a91549 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 7 Dec 2023 11:16:10 +0100 Subject: table sync: use write quorums to report global success or failure of sync --- src/rpc/layout/manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index 17465019..dc963ba0 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -139,7 +139,7 @@ impl LayoutManager { pub fn write_sets_of(self: &Arc, position: &Hash) -> WriteLock>> { let layout = self.layout(); let version = layout.current().version; - let nodes = layout.write_sets_of(position); + let nodes = layout.storage_sets_of(position); layout .ack_lock .get(&version) -- cgit v1.2.3 From 9cecea64d4509e95ac9793b29c947e2ecf9bb0b8 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 7 Dec 2023 14:27:53 +0100 Subject: layout: allow sync update tracker to progress with only quorums --- src/rpc/layout/manager.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index dc963ba0..ec8a2a15 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -14,12 +14,13 @@ use garage_util::error::*; use garage_util::persister::Persister; use super::*; +use crate::replication_mode::ReplicationMode; use crate::rpc_helper::*; use crate::system::*; pub struct LayoutManager { node_id: Uuid, - replication_factor: usize, + replication_mode: ReplicationMode, persist_cluster_layout: Persister, layout: Arc>, @@ -37,14 +38,16 @@ impl LayoutManager { node_id: NodeID, system_endpoint: Arc>, fullmesh: Arc, - replication_factor: usize, + replication_mode: ReplicationMode, ) -> Result, Error> { + let replication_factor = replication_mode.replication_factor(); + let persist_cluster_layout: Persister = Persister::new(&config.metadata_dir, "cluster_layout"); let cluster_layout = match persist_cluster_layout.load() { Ok(x) => { - if x.current().replication_factor != replication_factor { + if x.current().replication_factor != replication_mode.replication_factor() { return Err(Error::Message(format!( "Prevous cluster layout has replication factor {}, which is different than the one specified in the config file ({}). The previous cluster layout can be purged, if you know what you are doing, simply by deleting the `cluster_layout` file in your metadata directory.", x.current().replication_factor, @@ -62,7 +65,8 @@ impl LayoutManager { } }; - let mut cluster_layout = LayoutHelper::new(cluster_layout, Default::default()); + let mut cluster_layout = + LayoutHelper::new(replication_mode, cluster_layout, Default::default()); cluster_layout.update_trackers(node_id.into()); let layout = Arc::new(RwLock::new(cluster_layout)); @@ -77,7 +81,7 @@ impl LayoutManager { Ok(Arc::new(Self { node_id: node_id.into(), - replication_factor, + replication_mode, persist_cluster_layout, layout, change_notify, @@ -291,11 +295,11 @@ impl LayoutManager { adv.update_trackers ); - if adv.current().replication_factor != self.replication_factor { + if adv.current().replication_factor != self.replication_mode.replication_factor() { let msg = format!( "Received a cluster layout from another node with replication factor {}, which is different from what we have in our configuration ({}). Discarding the cluster layout we received.", adv.current().replication_factor, - self.replication_factor + self.replication_mode.replication_factor() ); error!("{}", msg); return Err(Error::Message(msg)); -- cgit v1.2.3 From 0041b013a473e3ae72f50209d8f79db75a72848b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 11 Dec 2023 16:09:22 +0100 Subject: layout: refactoring and fix in layout helper --- src/rpc/layout/manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/rpc/layout/manager.rs') diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index ec8a2a15..6747b79d 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -256,7 +256,7 @@ impl LayoutManager { // ---- RPC HANDLERS ---- - pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, remote: &LayoutDigest) { + pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, remote: &RpcLayoutDigest) { let local = self.layout().digest(); if remote.current_version > local.current_version || remote.active_versions != local.active_versions -- cgit v1.2.3