aboutsummaryrefslogtreecommitdiff
path: root/src/table/replication
diff options
context:
space:
mode:
Diffstat (limited to 'src/table/replication')
-rw-r--r--src/table/replication/fullcopy.rs51
-rw-r--r--src/table/replication/mod.rs6
-rw-r--r--src/table/replication/parameters.rs21
-rw-r--r--src/table/replication/sharded.rs50
4 files changed, 128 insertions, 0 deletions
diff --git a/src/table/replication/fullcopy.rs b/src/table/replication/fullcopy.rs
new file mode 100644
index 00000000..bd658f63
--- /dev/null
+++ b/src/table/replication/fullcopy.rs
@@ -0,0 +1,51 @@
+use std::sync::Arc;
+
+use garage_rpc::membership::System;
+use garage_rpc::ring::*;
+use garage_util::data::*;
+
+use crate::replication::*;
+
+#[derive(Clone)]
+pub struct TableFullReplication {
+ pub system: Arc<System>,
+ pub max_faults: usize,
+}
+
+impl TableReplication for TableFullReplication {
+ // Full replication schema: all nodes store everything
+ // Writes are disseminated in an epidemic manner in the network
+
+ // Advantage: do all reads locally, extremely fast
+ // Inconvenient: only suitable to reasonably small tables
+
+ fn read_nodes(&self, _hash: &Hash) -> Vec<UUID> {
+ vec![self.system.id]
+ }
+ fn read_quorum(&self) -> usize {
+ 1
+ }
+
+ fn write_nodes(&self, _hash: &Hash) -> Vec<UUID> {
+ let ring = self.system.ring.borrow();
+ ring.config.members.keys().cloned().collect::<Vec<_>>()
+ }
+ fn write_quorum(&self) -> usize {
+ let nmembers = self.system.ring.borrow().config.members.len();
+ if nmembers > self.max_faults {
+ nmembers - self.max_faults
+ } else {
+ 1
+ }
+ }
+ fn max_write_errors(&self) -> usize {
+ self.max_faults
+ }
+
+ fn partition_of(&self, _hash: &Hash) -> Partition {
+ 0u16
+ }
+ fn partitions(&self) -> Vec<(Partition, Hash)> {
+ vec![(0u16, [0u8; 32].into())]
+ }
+}
diff --git a/src/table/replication/mod.rs b/src/table/replication/mod.rs
new file mode 100644
index 00000000..d43d7f19
--- /dev/null
+++ b/src/table/replication/mod.rs
@@ -0,0 +1,6 @@
+mod parameters;
+
+pub mod fullcopy;
+pub mod sharded;
+
+pub use parameters::*;
diff --git a/src/table/replication/parameters.rs b/src/table/replication/parameters.rs
new file mode 100644
index 00000000..e46bd172
--- /dev/null
+++ b/src/table/replication/parameters.rs
@@ -0,0 +1,21 @@
+use garage_rpc::ring::*;
+
+use garage_util::data::*;
+
+pub trait TableReplication: Send + Sync {
+ // See examples in table_sharded.rs and table_fullcopy.rs
+ // To understand various replication methods
+
+ // Which nodes to send reads from
+ fn read_nodes(&self, hash: &Hash) -> Vec<UUID>;
+ fn read_quorum(&self) -> usize;
+
+ // Which nodes to send writes to
+ fn write_nodes(&self, hash: &Hash) -> Vec<UUID>;
+ fn write_quorum(&self) -> usize;
+ fn max_write_errors(&self) -> usize;
+
+ // Accessing partitions, for Merkle tree & sync
+ fn partition_of(&self, hash: &Hash) -> Partition;
+ fn partitions(&self) -> Vec<(Partition, Hash)>;
+}
diff --git a/src/table/replication/sharded.rs b/src/table/replication/sharded.rs
new file mode 100644
index 00000000..dce74b03
--- /dev/null
+++ b/src/table/replication/sharded.rs
@@ -0,0 +1,50 @@
+use std::sync::Arc;
+
+use garage_rpc::membership::System;
+use garage_rpc::ring::*;
+use garage_util::data::*;
+
+use crate::replication::*;
+
+#[derive(Clone)]
+pub struct TableShardedReplication {
+ pub system: Arc<System>,
+ pub replication_factor: usize,
+ pub read_quorum: usize,
+ pub write_quorum: usize,
+}
+
+impl TableReplication for TableShardedReplication {
+ // Sharded replication schema:
+ // - based on the ring of nodes, a certain set of neighbors
+ // store entries, given as a function of the position of the
+ // entry's hash in the ring
+ // - reads are done on all of the nodes that replicate the data
+ // - writes as well
+
+ fn read_nodes(&self, hash: &Hash) -> Vec<UUID> {
+ let ring = self.system.ring.borrow().clone();
+ ring.walk_ring(&hash, self.replication_factor)
+ }
+ fn read_quorum(&self) -> usize {
+ self.read_quorum
+ }
+
+ fn write_nodes(&self, hash: &Hash) -> Vec<UUID> {
+ let ring = self.system.ring.borrow();
+ ring.walk_ring(&hash, self.replication_factor)
+ }
+ fn write_quorum(&self) -> usize {
+ self.write_quorum
+ }
+ fn max_write_errors(&self) -> usize {
+ self.replication_factor - self.write_quorum
+ }
+
+ fn partition_of(&self, hash: &Hash) -> Partition {
+ self.system.ring.borrow().partition_of(hash)
+ }
+ fn partitions(&self) -> Vec<(Partition, Hash)> {
+ self.system.ring.borrow().partitions()
+ }
+}