diff options
author | Alex Auvolat <alex@adnab.me> | 2021-03-18 19:27:02 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2021-03-18 19:27:02 +0100 |
commit | 4348bde180887f5185ca6da6024476e8e8fb2fe6 (patch) | |
tree | 8a35f0e4229d15665af7673eebcaa1b3d75a73cb /src/table/replication | |
parent | 5b659b28ce6bef15072d2fc93f777aa8ff73b2d8 (diff) | |
parent | 4eb16e886388f35d2bdee52b16922421004cf132 (diff) | |
download | garage-4348bde180887f5185ca6da6024476e8e8fb2fe6.tar.gz garage-4348bde180887f5185ca6da6024476e8e8fb2fe6.zip |
Merge branch 'dev-0.2'
Diffstat (limited to 'src/table/replication')
-rw-r--r-- | src/table/replication/fullcopy.rs | 51 | ||||
-rw-r--r-- | src/table/replication/mod.rs | 6 | ||||
-rw-r--r-- | src/table/replication/parameters.rs | 21 | ||||
-rw-r--r-- | src/table/replication/sharded.rs | 50 |
4 files changed, 128 insertions, 0 deletions
diff --git a/src/table/replication/fullcopy.rs b/src/table/replication/fullcopy.rs new file mode 100644 index 00000000..bd658f63 --- /dev/null +++ b/src/table/replication/fullcopy.rs @@ -0,0 +1,51 @@ +use std::sync::Arc; + +use garage_rpc::membership::System; +use garage_rpc::ring::*; +use garage_util::data::*; + +use crate::replication::*; + +#[derive(Clone)] +pub struct TableFullReplication { + pub system: Arc<System>, + pub max_faults: usize, +} + +impl TableReplication for TableFullReplication { + // Full replication schema: all nodes store everything + // Writes are disseminated in an epidemic manner in the network + + // Advantage: do all reads locally, extremely fast + // Inconvenient: only suitable to reasonably small tables + + fn read_nodes(&self, _hash: &Hash) -> Vec<UUID> { + vec![self.system.id] + } + fn read_quorum(&self) -> usize { + 1 + } + + fn write_nodes(&self, _hash: &Hash) -> Vec<UUID> { + let ring = self.system.ring.borrow(); + ring.config.members.keys().cloned().collect::<Vec<_>>() + } + fn write_quorum(&self) -> usize { + let nmembers = self.system.ring.borrow().config.members.len(); + if nmembers > self.max_faults { + nmembers - self.max_faults + } else { + 1 + } + } + fn max_write_errors(&self) -> usize { + self.max_faults + } + + fn partition_of(&self, _hash: &Hash) -> Partition { + 0u16 + } + fn partitions(&self) -> Vec<(Partition, Hash)> { + vec![(0u16, [0u8; 32].into())] + } +} diff --git a/src/table/replication/mod.rs b/src/table/replication/mod.rs new file mode 100644 index 00000000..d43d7f19 --- /dev/null +++ b/src/table/replication/mod.rs @@ -0,0 +1,6 @@ +mod parameters; + +pub mod fullcopy; +pub mod sharded; + +pub use parameters::*; diff --git a/src/table/replication/parameters.rs b/src/table/replication/parameters.rs new file mode 100644 index 00000000..e46bd172 --- /dev/null +++ b/src/table/replication/parameters.rs @@ -0,0 +1,21 @@ +use garage_rpc::ring::*; + +use garage_util::data::*; + +pub trait TableReplication: Send + Sync { + // See examples in table_sharded.rs and table_fullcopy.rs + // To understand various replication methods + + // Which nodes to send reads from + fn read_nodes(&self, hash: &Hash) -> Vec<UUID>; + fn read_quorum(&self) -> usize; + + // Which nodes to send writes to + fn write_nodes(&self, hash: &Hash) -> Vec<UUID>; + fn write_quorum(&self) -> usize; + fn max_write_errors(&self) -> usize; + + // Accessing partitions, for Merkle tree & sync + fn partition_of(&self, hash: &Hash) -> Partition; + fn partitions(&self) -> Vec<(Partition, Hash)>; +} diff --git a/src/table/replication/sharded.rs b/src/table/replication/sharded.rs new file mode 100644 index 00000000..dce74b03 --- /dev/null +++ b/src/table/replication/sharded.rs @@ -0,0 +1,50 @@ +use std::sync::Arc; + +use garage_rpc::membership::System; +use garage_rpc::ring::*; +use garage_util::data::*; + +use crate::replication::*; + +#[derive(Clone)] +pub struct TableShardedReplication { + pub system: Arc<System>, + pub replication_factor: usize, + pub read_quorum: usize, + pub write_quorum: usize, +} + +impl TableReplication for TableShardedReplication { + // Sharded replication schema: + // - based on the ring of nodes, a certain set of neighbors + // store entries, given as a function of the position of the + // entry's hash in the ring + // - reads are done on all of the nodes that replicate the data + // - writes as well + + fn read_nodes(&self, hash: &Hash) -> Vec<UUID> { + let ring = self.system.ring.borrow().clone(); + ring.walk_ring(&hash, self.replication_factor) + } + fn read_quorum(&self) -> usize { + self.read_quorum + } + + fn write_nodes(&self, hash: &Hash) -> Vec<UUID> { + let ring = self.system.ring.borrow(); + ring.walk_ring(&hash, self.replication_factor) + } + fn write_quorum(&self) -> usize { + self.write_quorum + } + fn max_write_errors(&self) -> usize { + self.replication_factor - self.write_quorum + } + + fn partition_of(&self, hash: &Hash) -> Partition { + self.system.ring.borrow().partition_of(hash) + } + fn partitions(&self) -> Vec<(Partition, Hash)> { + self.system.ring.borrow().partitions() + } +} |