aboutsummaryrefslogblamecommitdiff
path: root/src/table/replication/sharded.rs
blob: 8ba3700f6c33a2d6a1f32386428a147e1a19c7d4 (plain) (tree)
1
2
3
4
5
6
7
8

                   
                          
                               

                         
                          
 





                                                                

                                    
                                               
                                
                                                        
                                      
                                                                                        
                               
                                                                                         



                                                   

                                                   



                                                                   
                                                        
                                                                




                                        

                                                              
         
                                         




                                                           
 
                                                          
                                                                         
         


                                                          
                                                          




                                                        
                                                                                       



                                                                                            
                                                     















                                                                               

         
use std::sync::Arc;

use garage_rpc::layout::*;
use garage_rpc::system::System;
use garage_util::data::*;

use crate::replication::*;

/// Sharded replication schema:
/// - based on the ring of nodes, a certain set of neighbors
///   store entries, given as a function of the position of the
///   entry's hash in the ring
/// - reads are done on all of the nodes that replicate the data
/// - writes as well
#[derive(Clone)]
pub struct TableShardedReplication {
	/// The membership manager of this node
	pub system: Arc<System>,
	/// How many time each data should be replicated
	pub replication_factor: usize,
	/// How many nodes to contact for a read, should be at most `replication_factor`
	pub read_quorum: usize,
	/// How many nodes to contact for a write, should be at most `replication_factor`
	pub write_quorum: usize,
}

impl TableReplication for TableShardedReplication {
	type WriteSets = WriteLock<Vec<Vec<Uuid>>>;

	fn storage_nodes(&self, hash: &Hash) -> Vec<Uuid> {
		self.system.cluster_layout().storage_nodes_of(hash)
	}

	fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> {
		self.system.cluster_layout().read_nodes_of(hash)
	}
	fn read_quorum(&self) -> usize {
		self.read_quorum
	}

	fn write_sets(&self, hash: &Hash) -> Self::WriteSets {
		self.system.layout_manager.write_sets_of(hash)
	}
	fn write_quorum(&self) -> usize {
		self.write_quorum
	}
	fn max_write_errors(&self) -> usize {
		self.replication_factor - self.write_quorum
	}

	fn partition_of(&self, hash: &Hash) -> Partition {
		self.system.cluster_layout().current().partition_of(hash)
	}

	fn sync_partitions(&self) -> SyncPartitions {
		let layout = self.system.cluster_layout();
		let layout_version = layout.ack_map_min();

		let mut partitions = layout
			.current()
			.partitions()
			.map(|(partition, first_hash)| {
				let storage_sets = layout.storage_sets_of(&first_hash);
				SyncPartition {
					partition,
					first_hash,
					last_hash: [0u8; 32].into(), // filled in just after
					storage_sets,
				}
			})
			.collect::<Vec<_>>();

		for i in 0..partitions.len() {
			partitions[i].last_hash = if i + 1 < partitions.len() {
				partitions[i + 1].first_hash
			} else {
				[0xFFu8; 32].into()
			};
		}

		SyncPartitions {
			layout_version,
			partitions,
		}
	}
}