diff options
-rw-r--r-- | src/model/block.rs | 2 | ||||
-rw-r--r-- | src/model/bucket_table.rs | 4 | ||||
-rw-r--r-- | src/model/garage.rs | 4 | ||||
-rw-r--r-- | src/model/key_table.rs | 4 | ||||
-rw-r--r-- | src/model/object_table.rs | 2 | ||||
-rw-r--r-- | src/model/version_table.rs | 2 | ||||
-rw-r--r-- | src/table/crdt/lww.rs | 2 | ||||
-rw-r--r-- | src/table/crdt/map.rs | 1 | ||||
-rw-r--r-- | src/table/gc.rs | 2 | ||||
-rw-r--r-- | src/table/lib.rs | 7 | ||||
-rw-r--r-- | src/table/replication/fullcopy.rs | 12 | ||||
-rw-r--r-- | src/table/replication/mod.rs | 6 | ||||
-rw-r--r-- | src/table/replication/parameters.rs | 10 | ||||
-rw-r--r-- | src/table/replication/sharded.rs | 17 | ||||
-rw-r--r-- | src/table/schema.rs | 19 | ||||
-rw-r--r-- | src/web/lib.rs | 3 |
16 files changed, 65 insertions, 32 deletions
diff --git a/src/model/block.rs b/src/model/block.rs index 0d9af38f..2b145615 100644 --- a/src/model/block.rs +++ b/src/model/block.rs @@ -18,7 +18,7 @@ use garage_rpc::membership::System; use garage_rpc::rpc_client::*; use garage_rpc::rpc_server::*; -use garage_table::replication::{sharded::TableShardedReplication, TableReplication}; +use garage_table::replication::{TableReplication, TableShardedReplication}; use crate::block_ref_table::*; diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index 6330dced..2ede4904 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -100,6 +100,10 @@ impl TableSchema for BucketTable { type E = Bucket; type Filter = DeletedFilter; + fn updated(&self, _old: Option<Self::E>, _new: Option<Self::E>) { + // nothing to do when updated + } + fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { filter.apply(entry.is_deleted()) } diff --git a/src/model/garage.rs b/src/model/garage.rs index 5f7a67c9..3f51f9fe 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -7,8 +7,8 @@ use garage_rpc::membership::System; use garage_rpc::rpc_client::RpcHttpClient; use garage_rpc::rpc_server::RpcServer; -use garage_table::replication::fullcopy::*; -use garage_table::replication::sharded::*; +use garage_table::replication::TableFullReplication; +use garage_table::replication::TableShardedReplication; use garage_table::*; use crate::block::*; diff --git a/src/model/key_table.rs b/src/model/key_table.rs index fcca3835..e6ebe8de 100644 --- a/src/model/key_table.rs +++ b/src/model/key_table.rs @@ -113,6 +113,10 @@ impl TableSchema for KeyTable { type E = Key; type Filter = KeyFilter; + fn updated(&self, _old: Option<Self::E>, _new: Option<Self::E>) { + // nothing to do when updated + } + fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { match filter { KeyFilter::Deleted(df) => df.apply(entry.deleted.get()), diff --git a/src/model/object_table.rs b/src/model/object_table.rs index 34ac798a..d5be62e5 100644 --- a/src/model/object_table.rs +++ b/src/model/object_table.rs @@ -6,7 +6,7 @@ use garage_util::background::BackgroundRunner; use garage_util::data::*; use garage_table::crdt::*; -use garage_table::replication::sharded::*; +use garage_table::replication::TableShardedReplication; use garage_table::*; use crate::version_table::*; diff --git a/src/model/version_table.rs b/src/model/version_table.rs index 841fbfea..fabd1fb1 100644 --- a/src/model/version_table.rs +++ b/src/model/version_table.rs @@ -5,7 +5,7 @@ use garage_util::background::BackgroundRunner; use garage_util::data::*; use garage_table::crdt::*; -use garage_table::replication::sharded::*; +use garage_table::replication::TableShardedReplication; use garage_table::*; use crate::block_ref_table::*; diff --git a/src/table/crdt/lww.rs b/src/table/crdt/lww.rs index 25ecdb07..3b1b2406 100644 --- a/src/table/crdt/lww.rs +++ b/src/table/crdt/lww.rs @@ -34,7 +34,7 @@ use crate::crdt::crdt::*; /// and may differ from what you observed with your atomic clock! /// /// This scheme is used by AWS S3 or Soundcloud and often without knowing -/// in entreprise when reconciliating databases with ad-hoc scripts. +/// in enterprise when reconciliating databases with ad-hoc scripts. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct LWW<T> { ts: u64, diff --git a/src/table/crdt/map.rs b/src/table/crdt/map.rs index 1193e6db..c4a30a26 100644 --- a/src/table/crdt/map.rs +++ b/src/table/crdt/map.rs @@ -37,6 +37,7 @@ where Self { vals: vec![(k, v)] } } + /// Add a value to the map pub fn put(&mut self, k: K, v: V) { self.merge(&Self::put_mutator(k, v)); } diff --git a/src/table/gc.rs b/src/table/gc.rs index e52bf599..694a3789 100644 --- a/src/table/gc.rs +++ b/src/table/gc.rs @@ -74,7 +74,7 @@ where while !*must_exit.borrow() { match self.gc_loop_iter().await { Ok(true) => { - // Stuff was done, loop imediately + // Stuff was done, loop immediately continue; } Ok(false) => { diff --git a/src/table/lib.rs b/src/table/lib.rs index 3b73163b..8dcb115d 100644 --- a/src/table/lib.rs +++ b/src/table/lib.rs @@ -1,3 +1,4 @@ +#![warn(missing_docs)] #![recursion_limit = "1024"] #[macro_use] @@ -8,10 +9,10 @@ pub mod schema; pub mod util; pub mod data; -pub mod gc; -pub mod merkle; +mod gc; +mod merkle; pub mod replication; -pub mod sync; +mod sync; pub mod table; pub use schema::*; diff --git a/src/table/replication/fullcopy.rs b/src/table/replication/fullcopy.rs index bd658f63..a6b4c98c 100644 --- a/src/table/replication/fullcopy.rs +++ b/src/table/replication/fullcopy.rs @@ -6,19 +6,19 @@ use garage_util::data::*; use crate::replication::*; +/// Full replication schema: all nodes store everything +/// Writes are disseminated in an epidemic manner in the network +/// Advantage: do all reads locally, extremely fast +/// Inconvenient: only suitable to reasonably small tables #[derive(Clone)] pub struct TableFullReplication { + /// The membership manager of this node pub system: Arc<System>, + /// Max number of faults allowed while replicating a record pub max_faults: usize, } impl TableReplication for TableFullReplication { - // Full replication schema: all nodes store everything - // Writes are disseminated in an epidemic manner in the network - - // Advantage: do all reads locally, extremely fast - // Inconvenient: only suitable to reasonably small tables - fn read_nodes(&self, _hash: &Hash) -> Vec<UUID> { vec![self.system.id] } diff --git a/src/table/replication/mod.rs b/src/table/replication/mod.rs index d43d7f19..dfcb026a 100644 --- a/src/table/replication/mod.rs +++ b/src/table/replication/mod.rs @@ -1,6 +1,8 @@ mod parameters; -pub mod fullcopy; -pub mod sharded; +mod fullcopy; +mod sharded; +pub use fullcopy::TableFullReplication; pub use parameters::*; +pub use sharded::TableShardedReplication; diff --git a/src/table/replication/parameters.rs b/src/table/replication/parameters.rs index e46bd172..0ab9ee5a 100644 --- a/src/table/replication/parameters.rs +++ b/src/table/replication/parameters.rs @@ -2,20 +2,26 @@ use garage_rpc::ring::*; use garage_util::data::*; +/// Trait to describe how a table shall be replicated pub trait TableReplication: Send + Sync { // See examples in table_sharded.rs and table_fullcopy.rs // To understand various replication methods - // Which nodes to send reads from + /// Which nodes to send read requests to fn read_nodes(&self, hash: &Hash) -> Vec<UUID>; + /// Responses needed to consider a read succesfull fn read_quorum(&self) -> usize; - // Which nodes to send writes to + /// Which nodes to send writes to fn write_nodes(&self, hash: &Hash) -> Vec<UUID>; + /// Responses needed to consider a write succesfull fn write_quorum(&self) -> usize; + // this feels like its write_nodes().len() - write_quorum() fn max_write_errors(&self) -> usize; // Accessing partitions, for Merkle tree & sync + /// Get partition for data with given hash fn partition_of(&self, hash: &Hash) -> Partition; + /// List of existing partitions fn partitions(&self) -> Vec<(Partition, Hash)>; } diff --git a/src/table/replication/sharded.rs b/src/table/replication/sharded.rs index dce74b03..f2d89729 100644 --- a/src/table/replication/sharded.rs +++ b/src/table/replication/sharded.rs @@ -6,22 +6,25 @@ use garage_util::data::*; use crate::replication::*; +/// Sharded replication schema: +/// - based on the ring of nodes, a certain set of neighbors +/// store entries, given as a function of the position of the +/// entry's hash in the ring +/// - reads are done on all of the nodes that replicate the data +/// - writes as well #[derive(Clone)] pub struct TableShardedReplication { + /// The membership manager of this node pub system: Arc<System>, + /// How many time each data should be replicated pub replication_factor: usize, + /// How many nodes to contact for a read, should be at most `replication_factor` pub read_quorum: usize, + /// How many nodes to contact for a write, should be at most `replication_factor` pub write_quorum: usize, } impl TableReplication for TableShardedReplication { - // Sharded replication schema: - // - based on the ring of nodes, a certain set of neighbors - // store entries, given as a function of the position of the - // entry's hash in the ring - // - reads are done on all of the nodes that replicate the data - // - writes as well - fn read_nodes(&self, hash: &Hash) -> Vec<UUID> { let ring = self.system.ring.borrow().clone(); ring.walk_ring(&hash, self.replication_factor) diff --git a/src/table/schema.rs b/src/table/schema.rs index 4d754664..f5fde95f 100644 --- a/src/table/schema.rs +++ b/src/table/schema.rs @@ -4,7 +4,9 @@ use garage_util::data::*; use crate::crdt::CRDT; +/// Trait for partitionnable data pub trait PartitionKey { + /// Get the key used to partition fn hash(&self) -> Hash; } @@ -20,7 +22,9 @@ impl PartitionKey for Hash { } } +/// Trait for sortable data pub trait SortKey { + /// Get the key used to sort fn sort_key(&self) -> &[u8]; } @@ -36,25 +40,34 @@ impl SortKey for Hash { } } +/// Trait for an entry in a table. It must be sortable and partitionnable. pub trait Entry<P: PartitionKey, S: SortKey>: CRDT + PartialEq + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync { + /// Get the key used to partition fn partition_key(&self) -> &P; + /// Get the key used to sort fn sort_key(&self) -> &S; + /// Is the entry a tombstone? Default implementation always return false fn is_tombstone(&self) -> bool { false } } +/// Trait for the schema used in a table pub trait TableSchema: Send + Sync { + /// The partition key used in that table type P: PartitionKey + Clone + PartialEq + Serialize + for<'de> Deserialize<'de> + Send + Sync; + /// The sort key used int that table type S: SortKey + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync; + /// They type for an entry in that table type E: Entry<Self::P, Self::S>; type Filter: Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync; // Action to take if not able to decode current version: // try loading from an older version + /// Try migrating an entry from an older version fn try_migrate(_bytes: &[u8]) -> Option<Self::E> { None } @@ -63,9 +76,7 @@ pub trait TableSchema: Send + Sync { // as the update itself is an unchangeable fact that will never go back // due to CRDT logic. Typically errors in propagation of info should be logged // to stderr. - fn updated(&self, _old: Option<Self::E>, _new: Option<Self::E>) {} + fn updated(&self, old: Option<Self::E>, new: Option<Self::E>); - fn matches_filter(_entry: &Self::E, _filter: &Self::Filter) -> bool { - true - } + fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool; } diff --git a/src/web/lib.rs b/src/web/lib.rs index f28937b9..7d3b4d54 100644 --- a/src/web/lib.rs +++ b/src/web/lib.rs @@ -1,6 +1,7 @@ #[macro_use] extern crate log; -pub mod error; +mod error; +pub use error::Error; pub mod web_server; |