aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/main.rs1
-rw-r--r--src/table.rs20
-rw-r--r--src/table_fullcopy.rs100
-rw-r--r--src/table_sharded.rs2
-rw-r--r--src/table_sync.rs6
5 files changed, 122 insertions, 7 deletions
diff --git a/src/main.rs b/src/main.rs
index cc9da8e2..89953223 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,7 @@ mod error;
mod background;
mod membership;
mod table;
+mod table_fullcopy;
mod table_sharded;
mod table_sync;
diff --git a/src/table.rs b/src/table.rs
index d5357277..619c96d2 100644
--- a/src/table.rs
+++ b/src/table.rs
@@ -427,6 +427,8 @@ where
self: &Arc<Self>,
mut entries: Vec<Arc<ByteBuf>>,
) -> Result<(), Error> {
+ let mut epidemic_propagate = vec![];
+
for update_bytes in entries.drain(..) {
let update = rmp_serde::decode::from_read_ref::<_, F::E>(update_bytes.as_slice())?;
@@ -449,16 +451,28 @@ where
.map_err(Error::RMPEncode)
.map_err(sled::ConflictableTransactionError::Abort)?;
db.insert(tree_key.clone(), new_bytes)?;
- Ok((old_entry, Some(new_entry)))
+ Ok((old_entry, new_entry))
})?;
- if old_entry != new_entry {
- self.instance.updated(old_entry, new_entry).await;
+ if old_entry.as_ref() != Some(&new_entry) {
+ if self.replication.epidemic_writes() {
+ epidemic_propagate.push(new_entry.clone());
+ }
+
+ self.instance.updated(old_entry, Some(new_entry)).await;
let syncer = self.syncer.load_full().unwrap();
self.system.background.spawn(syncer.invalidate(tree_key));
}
}
+
+ if epidemic_propagate.len() > 0 {
+ let self2 = self.clone();
+ self.system
+ .background
+ .spawn(async move { self2.insert_many(&epidemic_propagate[..]).await });
+ }
+
Ok(())
}
diff --git a/src/table_fullcopy.rs b/src/table_fullcopy.rs
new file mode 100644
index 00000000..d5194d55
--- /dev/null
+++ b/src/table_fullcopy.rs
@@ -0,0 +1,100 @@
+use arc_swap::ArcSwapOption;
+use std::sync::Arc;
+
+use crate::data::*;
+use crate::membership::{Ring, System};
+use crate::table::*;
+
+#[derive(Clone)]
+pub struct TableFullReplication {
+ pub write_factor: usize,
+ pub write_quorum: usize,
+
+ neighbors: ArcSwapOption<Neighbors>,
+}
+
+#[derive(Clone)]
+struct Neighbors {
+ ring: Arc<Ring>,
+ neighbors: Vec<UUID>,
+}
+
+impl TableFullReplication {
+ pub fn new(write_factor: usize, write_quorum: usize) -> Self {
+ TableFullReplication {
+ write_factor,
+ write_quorum,
+ neighbors: ArcSwapOption::from(None),
+ }
+ }
+
+ fn get_neighbors(&self, system: &System) -> Vec<UUID> {
+ let neighbors = self.neighbors.load_full();
+ if let Some(n) = neighbors {
+ if Arc::ptr_eq(&n.ring, &system.ring.borrow()) {
+ return n.neighbors.clone();
+ }
+ }
+
+ // Recalculate neighbors
+ let ring = system.ring.borrow().clone();
+ let my_id = system.id.clone();
+
+ let mut nodes = vec![];
+ for (node, _) in ring.config.members.iter() {
+ let node_ranking = hash(&[node.as_slice(), my_id.as_slice()].concat());
+ nodes.push((node.clone(), node_ranking));
+ }
+ nodes.sort_by(|(_, rank1), (_, rank2)| rank1.cmp(rank2));
+ let mut neighbors = nodes
+ .drain(..)
+ .map(|(node, _)| node)
+ .filter(|node| *node != my_id)
+ .take(self.write_factor)
+ .collect::<Vec<_>>();
+ neighbors.push(my_id);
+ self.neighbors.swap(Some(Arc::new(Neighbors {
+ ring,
+ neighbors: neighbors.clone(),
+ })));
+ neighbors
+ }
+}
+
+impl TableReplication for TableFullReplication {
+ // Full replication schema: all nodes store everything
+ // Writes are disseminated in an epidemic manner in the network
+
+ // Advantage: do all reads locally, extremely fast
+ // Inconvenient: only suitable to reasonably small tables
+
+ fn read_nodes(&self, _hash: &Hash, system: &System) -> Vec<UUID> {
+ vec![system.id.clone()]
+ }
+ fn read_quorum(&self) -> usize {
+ 1
+ }
+
+ fn write_nodes(&self, _hash: &Hash, system: &System) -> Vec<UUID> {
+ self.get_neighbors(system)
+ }
+ fn write_quorum(&self) -> usize {
+ self.write_quorum
+ }
+ fn max_write_errors(&self) -> usize {
+ self.write_factor - self.write_quorum
+ }
+ fn epidemic_writes(&self) -> bool {
+ true
+ }
+
+ fn replication_nodes(&self, _hash: &Hash, ring: &Ring) -> Vec<UUID> {
+ ring.config.members.keys().cloned().collect::<Vec<_>>()
+ }
+ fn split_points(&self, _ring: &Ring) -> Vec<Hash> {
+ let mut ret = vec![];
+ ret.push([0u8; 32].into());
+ ret.push([0xFFu8; 32].into());
+ ret
+ }
+}
diff --git a/src/table_sharded.rs b/src/table_sharded.rs
index 485a9212..6a174d05 100644
--- a/src/table_sharded.rs
+++ b/src/table_sharded.rs
@@ -1,5 +1,5 @@
use crate::data::*;
-use crate::membership::{System, Ring};
+use crate::membership::{Ring, System};
use crate::table::*;
#[derive(Clone)]
diff --git a/src/table_sync.rs b/src/table_sync.rs
index b4555a77..550ad0f0 100644
--- a/src/table_sync.rs
+++ b/src/table_sync.rs
@@ -604,7 +604,7 @@ impl SyncTodo {
for i in 0..split_points.len() - 1 {
let begin = split_points[i].clone();
let end = split_points[i + 1].clone();
- let nodes = table.replication.write_nodes_from_ring(&begin, &ring);
+ let nodes = table.replication.replication_nodes(&begin, &ring);
let retain = nodes.contains(&my_id);
if !retain {
@@ -650,11 +650,11 @@ impl SyncTodo {
let end = all_points[i + 1].clone();
let was_ours = table
.replication
- .write_nodes_from_ring(&begin, &old_ring)
+ .replication_nodes(&begin, &old_ring)
.contains(&my_id);
let is_ours = table
.replication
- .write_nodes_from_ring(&begin, &new_ring)
+ .replication_nodes(&begin, &new_ring)
.contains(&my_id);
let was_todo = match old_todo.binary_search_by(|x| x.begin.cmp(&begin)) {