diff options
Diffstat (limited to 'src/table/replication')
-rw-r--r-- | src/table/replication/fullcopy.rs | 59 | ||||
-rw-r--r-- | src/table/replication/mod.rs | 6 | ||||
-rw-r--r-- | src/table/replication/parameters.rs | 22 | ||||
-rw-r--r-- | src/table/replication/sharded.rs | 54 |
4 files changed, 141 insertions, 0 deletions
diff --git a/src/table/replication/fullcopy.rs b/src/table/replication/fullcopy.rs new file mode 100644 index 00000000..a62a6c3c --- /dev/null +++ b/src/table/replication/fullcopy.rs @@ -0,0 +1,59 @@ +use std::sync::Arc; + +use garage_rpc::membership::System; +use garage_rpc::ring::Ring; +use garage_util::data::*; + +use crate::replication::*; + +#[derive(Clone)] +pub struct TableFullReplication { + pub max_faults: usize, +} + +#[derive(Clone)] +struct Neighbors { + ring: Arc<Ring>, + neighbors: Vec<UUID>, +} + +impl TableFullReplication { + pub fn new(max_faults: usize) -> Self { + TableFullReplication { max_faults } + } +} + +impl TableReplication for TableFullReplication { + // Full replication schema: all nodes store everything + // Writes are disseminated in an epidemic manner in the network + + // Advantage: do all reads locally, extremely fast + // Inconvenient: only suitable to reasonably small tables + + fn read_nodes(&self, _hash: &Hash, system: &System) -> Vec<UUID> { + vec![system.id] + } + fn read_quorum(&self) -> usize { + 1 + } + + fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> { + self.replication_nodes(hash, system.ring.borrow().as_ref()) + } + fn write_quorum(&self, system: &System) -> usize { + system.ring.borrow().config.members.len() - self.max_faults + } + fn max_write_errors(&self) -> usize { + self.max_faults + } + + fn replication_nodes(&self, _hash: &Hash, ring: &Ring) -> Vec<UUID> { + ring.config.members.keys().cloned().collect::<Vec<_>>() + } + fn split_points(&self, _ring: &Ring) -> Vec<Hash> { + let mut ret = vec![]; + ret.push([0u8; 32].into()); + ret.push([0xFFu8; 32].into()); + ret + } +} diff --git a/src/table/replication/mod.rs b/src/table/replication/mod.rs new file mode 100644 index 00000000..d43d7f19 --- /dev/null +++ b/src/table/replication/mod.rs @@ -0,0 +1,6 @@ +mod parameters; + +pub mod fullcopy; +pub mod sharded; + +pub use parameters::*; diff --git a/src/table/replication/parameters.rs b/src/table/replication/parameters.rs new file mode 100644 index 00000000..4607b050 --- /dev/null +++ b/src/table/replication/parameters.rs @@ -0,0 +1,22 @@ +use garage_rpc::membership::System; +use garage_rpc::ring::Ring; + +use garage_util::data::*; + +pub trait TableReplication: Send + Sync { + // See examples in table_sharded.rs and table_fullcopy.rs + // To understand various replication methods + + // Which nodes to send reads from + fn read_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID>; + fn read_quorum(&self) -> usize; + + // Which nodes to send writes to + fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID>; + fn write_quorum(&self, system: &System) -> usize; + fn max_write_errors(&self) -> usize; + + // Which are the nodes that do actually replicate the data + fn replication_nodes(&self, hash: &Hash, ring: &Ring) -> Vec<UUID>; + fn split_points(&self, ring: &Ring) -> Vec<Hash>; +} diff --git a/src/table/replication/sharded.rs b/src/table/replication/sharded.rs new file mode 100644 index 00000000..42a742cd --- /dev/null +++ b/src/table/replication/sharded.rs @@ -0,0 +1,54 @@ +use garage_rpc::membership::System; +use garage_rpc::ring::Ring; +use garage_util::data::*; + +use crate::replication::*; + +#[derive(Clone)] +pub struct TableShardedReplication { + pub replication_factor: usize, + pub read_quorum: usize, + pub write_quorum: usize, +} + +impl TableReplication for TableShardedReplication { + // Sharded replication schema: + // - based on the ring of nodes, a certain set of neighbors + // store entries, given as a function of the position of the + // entry's hash in the ring + // - reads are done on all of the nodes that replicate the data + // - writes as well + + fn read_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> { + let ring = system.ring.borrow().clone(); + ring.walk_ring(&hash, self.replication_factor) + } + fn read_quorum(&self) -> usize { + self.read_quorum + } + + fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> { + let ring = system.ring.borrow().clone(); + ring.walk_ring(&hash, self.replication_factor) + } + fn write_quorum(&self, _system: &System) -> usize { + self.write_quorum + } + fn max_write_errors(&self) -> usize { + self.replication_factor - self.write_quorum + } + + fn replication_nodes(&self, hash: &Hash, ring: &Ring) -> Vec<UUID> { + ring.walk_ring(&hash, self.replication_factor) + } + fn split_points(&self, ring: &Ring) -> Vec<Hash> { + let mut ret = vec![]; + + ret.push([0u8; 32].into()); + for entry in ring.ring.iter() { + ret.push(entry.location); + } + ret.push([0xFFu8; 32].into()); + ret + } +} |