aboutsummaryrefslogtreecommitdiff
path: root/src/util/crdt/lww_map.rs
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2021-11-09 12:24:04 +0100
committerAlex Auvolat <alex@adnab.me>2021-11-16 16:05:53 +0100
commitc94406f4282d48e2e2ac82ffb57eafaad23f7edc (patch)
tree01fe1b272e18fdae993e2207d8d3aea4a301ec56 /src/util/crdt/lww_map.rs
parent53888995bdd7c672d2e3ab8bb6a3529195c127a9 (diff)
downloadgarage-c94406f4282d48e2e2ac82ffb57eafaad23f7edc.tar.gz
garage-c94406f4282d48e2e2ac82ffb57eafaad23f7edc.zip
Improve how node roles are assigned in Garagev0.5-beta1
- change the terminology: the network configuration becomes the role table, the configuration of a nodes becomes a node's role - the modification of the role table takes place in two steps: first, changes are staged in a CRDT data structure. Then, once the user is happy with the changes, they can commit them all at once (or revert them). - update documentation - fix tests - implement smarter partition assignation algorithm This patch breaks the format of the network configuration: when migrating, the cluster will be in a state where no roles are assigned. All roles must be re-assigned and commited at once. This migration should not pose an issue.
Diffstat (limited to 'src/util/crdt/lww_map.rs')
-rw-r--r--src/util/crdt/lww_map.rs167
1 files changed, 167 insertions, 0 deletions
diff --git a/src/util/crdt/lww_map.rs b/src/util/crdt/lww_map.rs
new file mode 100644
index 00000000..3e9aba79
--- /dev/null
+++ b/src/util/crdt/lww_map.rs
@@ -0,0 +1,167 @@
+use std::cmp::Ordering;
+
+use serde::{Deserialize, Serialize};
+
+use crate::time::now_msec;
+
+use crate::crdt::crdt::*;
+
+/// Last Write Win Map
+///
+/// This types defines a CRDT for a map from keys to values.
+/// The values have an associated timestamp, such that the last written value
+/// takes precedence over previous ones. As for the simpler `LWW` type, the value
+/// type `V` is also required to implement the CRDT trait.
+/// We do not encourage mutating the values associated with a given key
+/// without updating the timestamp, in fact at the moment we do not provide a `.get_mut()`
+/// method that would allow that.
+///
+/// Internally, the map is stored as a vector of keys and values, sorted by ascending key order.
+/// This is why the key type `K` must implement `Ord` (and also to ensure a unique serialization,
+/// such that two values can be compared for equality based on their hashes). As a consequence,
+/// insertions take `O(n)` time. This means that LWWMap should be used for reasonably small maps.
+/// However, note that even if we were using a more efficient data structure such as a `BTreeMap`,
+/// the serialization cost `O(n)` would still have to be paid at each modification, so we are
+/// actually not losing anything here.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+pub struct LwwMap<K, V> {
+ vals: Vec<(K, u64, V)>,
+}
+
+impl<K, V> LwwMap<K, V>
+where
+ K: Ord,
+ V: Crdt,
+{
+ /// Create a new empty map CRDT
+ pub fn new() -> Self {
+ Self { vals: vec![] }
+ }
+ /// Used to migrate from a map defined in an incompatible format. This produces
+ /// a map that contains a single item with the specified timestamp (copied from
+ /// the incompatible format). Do this as many times as you have items to migrate,
+ /// and put them all together using the CRDT merge operator.
+ pub fn migrate_from_raw_item(k: K, ts: u64, v: V) -> Self {
+ Self {
+ vals: vec![(k, ts, v)],
+ }
+ }
+ /// Returns a map that contains a single mapping from the specified key to the specified value.
+ /// This map is a mutator, or a delta-CRDT, such that when it is merged with the original map,
+ /// the previous value will be replaced with the one specified here.
+ /// The timestamp in the provided mutator is set to the maximum of the current system's clock
+ /// and 1 + the previous value's timestamp (if there is one), so that the new value will always
+ /// take precedence (LWW rule).
+ ///
+ /// Typically, to update the value associated to a key in the map, you would do the following:
+ ///
+ /// ```ignore
+ /// let my_update = my_crdt.update_mutator(key_to_modify, new_value);
+ /// my_crdt.merge(&my_update);
+ /// ```
+ ///
+ /// However extracting the mutator on its own and only sending that on the network is very
+ /// interesting as it is much smaller than the whole map.
+ pub fn update_mutator(&self, k: K, new_v: V) -> Self {
+ let new_vals = match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
+ Ok(i) => {
+ let (_, old_ts, _) = self.vals[i];
+ let new_ts = std::cmp::max(old_ts + 1, now_msec());
+ vec![(k, new_ts, new_v)]
+ }
+ Err(_) => vec![(k, now_msec(), new_v)],
+ };
+ Self { vals: new_vals }
+ }
+ /// Takes all of the values of the map and returns them. The current map is reset to the
+ /// empty map. This is very usefull to produce in-place a new map that contains only a delta
+ /// that modifies a certain value:
+ ///
+ /// ```ignore
+ /// let mut a = get_my_crdt_value();
+ /// let old_a = a.take_and_clear();
+ /// a.merge(&old_a.update_mutator(key_to_modify, new_value));
+ /// put_my_crdt_value(a);
+ /// ```
+ ///
+ /// Of course in this simple example we could have written simply
+ /// `pyt_my_crdt_value(a.update_mutator(key_to_modify, new_value))`,
+ /// but in the case where the map is a field in a struct for instance (as is always the case),
+ /// this becomes very handy:
+ ///
+ /// ```ignore
+ /// let mut a = get_my_crdt_value();
+ /// let old_a_map = a.map_field.take_and_clear();
+ /// a.map_field.merge(&old_a_map.update_mutator(key_to_modify, new_value));
+ /// put_my_crdt_value(a);
+ /// ```
+ pub fn take_and_clear(&mut self) -> Self {
+ let vals = std::mem::take(&mut self.vals);
+ Self { vals }
+ }
+ /// Removes all values from the map
+ pub fn clear(&mut self) {
+ self.vals.clear();
+ }
+ /// Get a reference to the value assigned to a key
+ pub fn get(&self, k: &K) -> Option<&V> {
+ match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(k)) {
+ Ok(i) => Some(&self.vals[i].2),
+ Err(_) => None,
+ }
+ }
+ /// Gets a reference to all of the items, as a slice. Usefull to iterate on all map values.
+ /// In most case you will want to ignore the timestamp (second item of the tuple).
+ pub fn items(&self) -> &[(K, u64, V)] {
+ &self.vals[..]
+ }
+
+ /// Returns the number of items in the map
+ pub fn len(&self) -> usize {
+ self.vals.len()
+ }
+
+ /// Returns true if the map is empty
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+}
+
+impl<K, V> Crdt for LwwMap<K, V>
+where
+ K: Clone + Ord,
+ V: Clone + Crdt,
+{
+ fn merge(&mut self, other: &Self) {
+ for (k, ts2, v2) in other.vals.iter() {
+ match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(k)) {
+ Ok(i) => {
+ let (_, ts1, _v1) = &self.vals[i];
+ match ts2.cmp(ts1) {
+ Ordering::Greater => {
+ self.vals[i].1 = *ts2;
+ self.vals[i].2 = v2.clone();
+ }
+ Ordering::Equal => {
+ self.vals[i].2.merge(v2);
+ }
+ Ordering::Less => (),
+ }
+ }
+ Err(i) => {
+ self.vals.insert(i, (k.clone(), *ts2, v2.clone()));
+ }
+ }
+ }
+ }
+}
+
+impl<K, V> Default for LwwMap<K, V>
+where
+ K: Ord,
+ V: Crdt,
+{
+ fn default() -> Self {
+ Self::new()
+ }
+}