aboutsummaryrefslogtreecommitdiff
path: root/src/table/replication
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2021-03-11 16:54:15 +0100
committerAlex Auvolat <alex@adnab.me>2021-03-11 16:54:15 +0100
commit94f3d287742ff90f179f528421c690b00b71a912 (patch)
tree9269d537da06d609cdc42cba8d9ab3c67d0650b9 /src/table/replication
parent8d63738cb062e816fc01c6aa2b32936ad31ff65b (diff)
downloadgarage-94f3d287742ff90f179f528421c690b00b71a912.tar.gz
garage-94f3d287742ff90f179f528421c690b00b71a912.zip
WIP big refactoring
Diffstat (limited to 'src/table/replication')
-rw-r--r--src/table/replication/fullcopy.rs59
-rw-r--r--src/table/replication/mod.rs6
-rw-r--r--src/table/replication/parameters.rs22
-rw-r--r--src/table/replication/sharded.rs54
4 files changed, 141 insertions, 0 deletions
diff --git a/src/table/replication/fullcopy.rs b/src/table/replication/fullcopy.rs
new file mode 100644
index 00000000..a62a6c3c
--- /dev/null
+++ b/src/table/replication/fullcopy.rs
@@ -0,0 +1,59 @@
+use std::sync::Arc;
+
+use garage_rpc::membership::System;
+use garage_rpc::ring::Ring;
+use garage_util::data::*;
+
+use crate::replication::*;
+
+#[derive(Clone)]
+pub struct TableFullReplication {
+ pub max_faults: usize,
+}
+
+#[derive(Clone)]
+struct Neighbors {
+ ring: Arc<Ring>,
+ neighbors: Vec<UUID>,
+}
+
+impl TableFullReplication {
+ pub fn new(max_faults: usize) -> Self {
+ TableFullReplication { max_faults }
+ }
+}
+
+impl TableReplication for TableFullReplication {
+ // Full replication schema: all nodes store everything
+ // Writes are disseminated in an epidemic manner in the network
+
+ // Advantage: do all reads locally, extremely fast
+ // Inconvenient: only suitable to reasonably small tables
+
+ fn read_nodes(&self, _hash: &Hash, system: &System) -> Vec<UUID> {
+ vec![system.id]
+ }
+ fn read_quorum(&self) -> usize {
+ 1
+ }
+
+ fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> {
+ self.replication_nodes(hash, system.ring.borrow().as_ref())
+ }
+ fn write_quorum(&self, system: &System) -> usize {
+ system.ring.borrow().config.members.len() - self.max_faults
+ }
+ fn max_write_errors(&self) -> usize {
+ self.max_faults
+ }
+
+ fn replication_nodes(&self, _hash: &Hash, ring: &Ring) -> Vec<UUID> {
+ ring.config.members.keys().cloned().collect::<Vec<_>>()
+ }
+ fn split_points(&self, _ring: &Ring) -> Vec<Hash> {
+ let mut ret = vec![];
+ ret.push([0u8; 32].into());
+ ret.push([0xFFu8; 32].into());
+ ret
+ }
+}
diff --git a/src/table/replication/mod.rs b/src/table/replication/mod.rs
new file mode 100644
index 00000000..d43d7f19
--- /dev/null
+++ b/src/table/replication/mod.rs
@@ -0,0 +1,6 @@
+mod parameters;
+
+pub mod fullcopy;
+pub mod sharded;
+
+pub use parameters::*;
diff --git a/src/table/replication/parameters.rs b/src/table/replication/parameters.rs
new file mode 100644
index 00000000..4607b050
--- /dev/null
+++ b/src/table/replication/parameters.rs
@@ -0,0 +1,22 @@
+use garage_rpc::membership::System;
+use garage_rpc::ring::Ring;
+
+use garage_util::data::*;
+
+pub trait TableReplication: Send + Sync {
+ // See examples in table_sharded.rs and table_fullcopy.rs
+ // To understand various replication methods
+
+ // Which nodes to send reads from
+ fn read_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID>;
+ fn read_quorum(&self) -> usize;
+
+ // Which nodes to send writes to
+ fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID>;
+ fn write_quorum(&self, system: &System) -> usize;
+ fn max_write_errors(&self) -> usize;
+
+ // Which are the nodes that do actually replicate the data
+ fn replication_nodes(&self, hash: &Hash, ring: &Ring) -> Vec<UUID>;
+ fn split_points(&self, ring: &Ring) -> Vec<Hash>;
+}
diff --git a/src/table/replication/sharded.rs b/src/table/replication/sharded.rs
new file mode 100644
index 00000000..42a742cd
--- /dev/null
+++ b/src/table/replication/sharded.rs
@@ -0,0 +1,54 @@
+use garage_rpc::membership::System;
+use garage_rpc::ring::Ring;
+use garage_util::data::*;
+
+use crate::replication::*;
+
+#[derive(Clone)]
+pub struct TableShardedReplication {
+ pub replication_factor: usize,
+ pub read_quorum: usize,
+ pub write_quorum: usize,
+}
+
+impl TableReplication for TableShardedReplication {
+ // Sharded replication schema:
+ // - based on the ring of nodes, a certain set of neighbors
+ // store entries, given as a function of the position of the
+ // entry's hash in the ring
+ // - reads are done on all of the nodes that replicate the data
+ // - writes as well
+
+ fn read_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> {
+ let ring = system.ring.borrow().clone();
+ ring.walk_ring(&hash, self.replication_factor)
+ }
+ fn read_quorum(&self) -> usize {
+ self.read_quorum
+ }
+
+ fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> {
+ let ring = system.ring.borrow().clone();
+ ring.walk_ring(&hash, self.replication_factor)
+ }
+ fn write_quorum(&self, _system: &System) -> usize {
+ self.write_quorum
+ }
+ fn max_write_errors(&self) -> usize {
+ self.replication_factor - self.write_quorum
+ }
+
+ fn replication_nodes(&self, hash: &Hash, ring: &Ring) -> Vec<UUID> {
+ ring.walk_ring(&hash, self.replication_factor)
+ }
+ fn split_points(&self, ring: &Ring) -> Vec<Hash> {
+ let mut ret = vec![];
+
+ ret.push([0u8; 32].into());
+ for entry in ring.ring.iter() {
+ ret.push(entry.location);
+ }
+ ret.push([0xFFu8; 32].into());
+ ret
+ }
+}