aboutsummaryrefslogtreecommitdiff
path: root/src/model
diff options
context:
space:
mode:
Diffstat (limited to 'src/model')
-rw-r--r--src/model/Cargo.toml3
-rw-r--r--src/model/garage.rs24
-rw-r--r--src/model/helper/bucket.rs12
-rw-r--r--src/model/index_counter.rs8
-rw-r--r--src/model/k2v/rpc.rs45
-rw-r--r--src/model/s3/object_table.rs174
-rw-r--r--src/model/s3/version_table.rs5
7 files changed, 225 insertions, 46 deletions
diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml
index 2e5b047d..776671d0 100644
--- a/src/model/Cargo.toml
+++ b/src/model/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "garage_model"
-version = "0.9.3"
+version = "0.10.0"
authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018"
license = "AGPL-3.0"
@@ -27,6 +27,7 @@ blake2.workspace = true
chrono.workspace = true
err-derive.workspace = true
hex.workspace = true
+http.workspace = true
base64.workspace = true
tracing.workspace = true
rand.workspace = true
diff --git a/src/model/garage.rs b/src/model/garage.rs
index 18421ca3..7ec8b22e 100644
--- a/src/model/garage.rs
+++ b/src/model/garage.rs
@@ -10,7 +10,7 @@ use garage_util::config::*;
use garage_util::error::*;
use garage_util::persister::PersisterShared;
-use garage_rpc::replication_mode::ReplicationMode;
+use garage_rpc::replication_mode::*;
use garage_rpc::system::System;
use garage_block::manager::*;
@@ -40,8 +40,8 @@ pub struct Garage {
/// The set of background variables that can be viewed/modified at runtime
pub bg_vars: vars::BgVars,
- /// The replication mode of this cluster
- pub replication_mode: ReplicationMode,
+ /// The replication factor of this cluster
+ pub replication_factor: ReplicationFactor,
/// The local database
pub db: db::Db,
@@ -148,32 +148,30 @@ impl Garage {
.and_then(|x| NetworkKey::from_slice(&x))
.ok_or_message("Invalid RPC secret key")?;
- let replication_mode = ReplicationMode::parse(&config.replication_mode)
- .ok_or_message("Invalid replication_mode in config file.")?;
+ let (replication_factor, consistency_mode) = parse_replication_mode(&config)?;
info!("Initialize background variable system...");
let mut bg_vars = vars::BgVars::new();
info!("Initialize membership management system...");
- let system = System::new(network_key, replication_mode, &config)?;
+ let system = System::new(network_key, replication_factor, consistency_mode, &config)?;
let data_rep_param = TableShardedReplication {
system: system.clone(),
- replication_factor: replication_mode.replication_factor(),
- write_quorum: replication_mode.write_quorum(),
+ replication_factor: replication_factor.into(),
+ write_quorum: replication_factor.write_quorum(consistency_mode),
read_quorum: 1,
};
let meta_rep_param = TableShardedReplication {
system: system.clone(),
- replication_factor: replication_mode.replication_factor(),
- write_quorum: replication_mode.write_quorum(),
- read_quorum: replication_mode.read_quorum(),
+ replication_factor: replication_factor.into(),
+ write_quorum: replication_factor.write_quorum(consistency_mode),
+ read_quorum: replication_factor.read_quorum(consistency_mode),
};
let control_rep_param = TableFullReplication {
system: system.clone(),
- max_faults: replication_mode.control_write_max_faults(),
};
info!("Initialize block manager...");
@@ -265,7 +263,7 @@ impl Garage {
Ok(Arc::new(Self {
config,
bg_vars,
- replication_mode,
+ replication_factor,
db,
system,
block_manager,
diff --git a/src/model/helper/bucket.rs b/src/model/helper/bucket.rs
index f4e669c3..a712d683 100644
--- a/src/model/helper/bucket.rs
+++ b/src/model/helper/bucket.rs
@@ -112,10 +112,12 @@ impl<'a> BucketHelper<'a> {
#[cfg(feature = "k2v")]
{
- use garage_rpc::ring::Ring;
- use std::sync::Arc;
-
- let ring: Arc<Ring> = self.0.system.ring.borrow().clone();
+ let node_id_vec = self
+ .0
+ .system
+ .cluster_layout()
+ .all_nongateway_nodes()
+ .to_vec();
let k2vindexes = self
.0
.k2v
@@ -124,7 +126,7 @@ impl<'a> BucketHelper<'a> {
.get_range(
&bucket_id,
None,
- Some((DeletedFilter::NotDeleted, ring.layout.node_id_vec.clone())),
+ Some((DeletedFilter::NotDeleted, node_id_vec)),
10,
EnumerationOrder::Forward,
)
diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs
index c0bf38d8..aa13ee7b 100644
--- a/src/model/index_counter.rs
+++ b/src/model/index_counter.rs
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
use garage_db as db;
-use garage_rpc::ring::Ring;
+use garage_rpc::layout::LayoutHelper;
use garage_rpc::system::System;
use garage_util::background::BackgroundRunner;
use garage_util::data::*;
@@ -83,9 +83,9 @@ impl<T: CountedItem> Entry<T::CP, T::CS> for CounterEntry<T> {
}
impl<T: CountedItem> CounterEntry<T> {
- pub fn filtered_values(&self, ring: &Ring) -> HashMap<String, i64> {
- let nodes = &ring.layout.node_id_vec[..];
- self.filtered_values_with_nodes(nodes)
+ pub fn filtered_values(&self, layout: &LayoutHelper) -> HashMap<String, i64> {
+ let nodes = layout.all_nongateway_nodes();
+ self.filtered_values_with_nodes(&nodes)
}
pub fn filtered_values_with_nodes(&self, nodes: &[Uuid]) -> HashMap<String, i64> {
diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs
index 4ab44c22..e15f2df8 100644
--- a/src/model/k2v/rpc.rs
+++ b/src/model/k2v/rpc.rs
@@ -127,23 +127,21 @@ impl K2VRpcHandler {
.item_table
.data
.replication
- .write_nodes(&partition.hash());
+ .storage_nodes(&partition.hash());
who.sort();
self.system
- .rpc
+ .rpc_helper()
.try_call_many(
&self.endpoint,
- &who[..],
+ &who,
K2VRpc::InsertItem(InsertedItem {
partition,
sort_key,
causal_context,
value,
}),
- RequestStrategy::with_priority(PRIO_NORMAL)
- .with_quorum(1)
- .interrupt_after_quorum(true),
+ RequestStrategy::with_priority(PRIO_NORMAL).with_quorum(1),
)
.await?;
@@ -168,7 +166,7 @@ impl K2VRpcHandler {
.item_table
.data
.replication
- .write_nodes(&partition.hash());
+ .storage_nodes(&partition.hash());
who.sort();
call_list.entry(who).or_default().push(InsertedItem {
@@ -187,14 +185,12 @@ impl K2VRpcHandler {
let call_futures = call_list.into_iter().map(|(nodes, items)| async move {
let resp = self
.system
- .rpc
+ .rpc_helper()
.try_call_many(
&self.endpoint,
&nodes[..],
K2VRpc::InsertManyItems(items),
- RequestStrategy::with_priority(PRIO_NORMAL)
- .with_quorum(1)
- .interrupt_after_quorum(true),
+ RequestStrategy::with_priority(PRIO_NORMAL).with_quorum(1),
)
.await?;
Ok::<_, Error>((nodes, resp))
@@ -223,15 +219,16 @@ impl K2VRpcHandler {
},
sort_key,
};
+ // TODO figure this out with write sets, is it still appropriate???
let nodes = self
.item_table
.data
.replication
- .write_nodes(&poll_key.partition.hash());
+ .read_nodes(&poll_key.partition.hash());
- let rpc = self.system.rpc.try_call_many(
+ let rpc = self.system.rpc_helper().try_call_many(
&self.endpoint,
- &nodes[..],
+ &nodes,
K2VRpc::PollItem {
key: poll_key,
causal_context,
@@ -239,9 +236,11 @@ impl K2VRpcHandler {
},
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.item_table.data.replication.read_quorum())
+ .send_all_at_once(true)
.without_timeout(),
);
- let timeout_duration = Duration::from_millis(timeout_msec) + self.system.rpc.rpc_timeout();
+ let timeout_duration =
+ Duration::from_millis(timeout_msec) + self.system.rpc_helper().rpc_timeout();
let resps = select! {
r = rpc => r?,
_ = tokio::time::sleep(timeout_duration) => return Ok(None),
@@ -283,11 +282,12 @@ impl K2VRpcHandler {
seen.restrict(&range);
// Prepare PollRange RPC to send to the storage nodes responsible for the parititon
+ // TODO figure this out with write sets, does it still work????
let nodes = self
.item_table
.data
.replication
- .write_nodes(&range.partition.hash());
+ .read_nodes(&range.partition.hash());
let quorum = self.item_table.data.replication.read_quorum();
let msg = K2VRpc::PollRange {
range,
@@ -300,7 +300,11 @@ impl K2VRpcHandler {
let rs = RequestStrategy::with_priority(PRIO_NORMAL).without_timeout();
let mut requests = nodes
.iter()
- .map(|node| self.system.rpc.call(&self.endpoint, *node, msg.clone(), rs))
+ .map(|node| {
+ self.system
+ .rpc_helper()
+ .call(&self.endpoint, *node, msg.clone(), rs)
+ })
.collect::<FuturesUnordered<_>>();
// Fetch responses. This procedure stops fetching responses when any of the following
@@ -316,8 +320,9 @@ impl K2VRpcHandler {
// kind: all items produced by that node until time ts have been returned, so we can
// bump the entry in the global vector clock and possibly remove some item-specific
// vector clocks)
- let mut deadline =
- Instant::now() + Duration::from_millis(timeout_msec) + self.system.rpc.rpc_timeout();
+ let mut deadline = Instant::now()
+ + Duration::from_millis(timeout_msec)
+ + self.system.rpc_helper().rpc_timeout();
let mut resps = vec![];
let mut errors = vec![];
loop {
@@ -339,7 +344,7 @@ impl K2VRpcHandler {
}
if errors.len() > nodes.len() - quorum {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
- return Err(Error::Quorum(quorum, resps.len(), nodes.len(), errors).into());
+ return Err(Error::Quorum(quorum, None, resps.len(), nodes.len(), errors).into());
}
// Take all returned items into account to produce the response.
diff --git a/src/model/s3/object_table.rs b/src/model/s3/object_table.rs
index ebea04bd..f2d21493 100644
--- a/src/model/s3/object_table.rs
+++ b/src/model/s3/object_table.rs
@@ -210,7 +210,179 @@ mod v09 {
}
}
-pub use v09::*;
+mod v010 {
+ use garage_util::data::{Hash, Uuid};
+ use serde::{Deserialize, Serialize};
+
+ use super::v09;
+
+ /// An object
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub struct Object {
+ /// The bucket in which the object is stored, used as partition key
+ pub bucket_id: Uuid,
+
+ /// The key at which the object is stored in its bucket, used as sorting key
+ pub key: String,
+
+ /// The list of currenty stored versions of the object
+ pub(super) versions: Vec<ObjectVersion>,
+ }
+
+ /// Informations about a version of an object
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub struct ObjectVersion {
+ /// Id of the version
+ pub uuid: Uuid,
+ /// Timestamp of when the object was created
+ pub timestamp: u64,
+ /// State of the version
+ pub state: ObjectVersionState,
+ }
+
+ /// State of an object version
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub enum ObjectVersionState {
+ /// The version is being received
+ Uploading {
+ /// Indicates whether this is a multipart upload
+ multipart: bool,
+ /// Encryption params + headers to be included in the final object
+ encryption: ObjectVersionEncryption,
+ },
+ /// The version is fully received
+ Complete(ObjectVersionData),
+ /// The version uploaded containded errors or the upload was explicitly aborted
+ Aborted,
+ }
+
+ /// Data stored in object version
+ #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
+ pub enum ObjectVersionData {
+ /// The object was deleted, this Version is a tombstone to mark it as such
+ DeleteMarker,
+ /// The object is short, it's stored inlined.
+ /// It is never compressed. For encrypted objects, it is encrypted using
+ /// AES256-GCM, like the encrypted headers.
+ Inline(ObjectVersionMeta, #[serde(with = "serde_bytes")] Vec<u8>),
+ /// The object is not short, Hash of first block is stored here, next segments hashes are
+ /// stored in the version table
+ FirstBlock(ObjectVersionMeta, Hash),
+ }
+
+ /// Metadata about the object version
+ #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
+ pub struct ObjectVersionMeta {
+ /// Size of the object. If object is encrypted/compressed,
+ /// this is always the size of the unencrypted/uncompressed data
+ pub size: u64,
+ /// etag of the object
+ pub etag: String,
+ /// Encryption params + headers (encrypted or plaintext)
+ pub encryption: ObjectVersionEncryption,
+ }
+
+ /// Encryption information + metadata
+ #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
+ pub enum ObjectVersionEncryption {
+ SseC {
+ /// Encrypted serialized ObjectVersionHeaders struct.
+ /// This is never compressed, just encrypted using AES256-GCM.
+ #[serde(with = "serde_bytes")]
+ headers: Vec<u8>,
+ /// Whether data blocks are compressed in addition to being encrypted
+ /// (compression happens before encryption, whereas for non-encrypted
+ /// objects, compression is handled at the level of the block manager)
+ compressed: bool,
+ },
+ Plaintext {
+ /// Plain-text headers
+ headers: ObjectVersionHeaders,
+ },
+ }
+
+ /// Vector of headers, as tuples of the format (header name, header value)
+ #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
+ pub struct ObjectVersionHeaders(pub Vec<(String, String)>);
+
+ impl garage_util::migrate::Migrate for Object {
+ const VERSION_MARKER: &'static [u8] = b"G010s3ob";
+
+ type Previous = v09::Object;
+
+ fn migrate(old: v09::Object) -> Object {
+ Object {
+ bucket_id: old.bucket_id,
+ key: old.key,
+ versions: old.versions.into_iter().map(migrate_version).collect(),
+ }
+ }
+ }
+
+ fn migrate_version(old: v09::ObjectVersion) -> ObjectVersion {
+ ObjectVersion {
+ uuid: old.uuid,
+ timestamp: old.timestamp,
+ state: match old.state {
+ v09::ObjectVersionState::Uploading { multipart, headers } => {
+ ObjectVersionState::Uploading {
+ multipart,
+ encryption: migrate_headers(headers),
+ }
+ }
+ v09::ObjectVersionState::Complete(d) => {
+ ObjectVersionState::Complete(migrate_data(d))
+ }
+ v09::ObjectVersionState::Aborted => ObjectVersionState::Aborted,
+ },
+ }
+ }
+
+ fn migrate_data(old: v09::ObjectVersionData) -> ObjectVersionData {
+ match old {
+ v09::ObjectVersionData::DeleteMarker => ObjectVersionData::DeleteMarker,
+ v09::ObjectVersionData::Inline(meta, data) => {
+ ObjectVersionData::Inline(migrate_meta(meta), data)
+ }
+ v09::ObjectVersionData::FirstBlock(meta, fb) => {
+ ObjectVersionData::FirstBlock(migrate_meta(meta), fb)
+ }
+ }
+ }
+
+ fn migrate_meta(old: v09::ObjectVersionMeta) -> ObjectVersionMeta {
+ ObjectVersionMeta {
+ size: old.size,
+ etag: old.etag,
+ encryption: migrate_headers(old.headers),
+ }
+ }
+
+ fn migrate_headers(old: v09::ObjectVersionHeaders) -> ObjectVersionEncryption {
+ use http::header::CONTENT_TYPE;
+
+ let mut new_headers = Vec::with_capacity(old.other.len() + 1);
+ if old.content_type != "blob" {
+ new_headers.push((CONTENT_TYPE.as_str().to_string(), old.content_type));
+ }
+ for (name, value) in old.other.into_iter() {
+ new_headers.push((name, value));
+ }
+
+ ObjectVersionEncryption::Plaintext {
+ headers: ObjectVersionHeaders(new_headers),
+ }
+ }
+
+ // Since ObjectVersionHeaders can now be serialized independently, for the
+ // purpose of being encrypted, we need it to support migrations on its own
+ // as well.
+ impl garage_util::migrate::InitialFormat for ObjectVersionHeaders {
+ const VERSION_MARKER: &'static [u8] = b"G010s3oh";
+ }
+}
+
+pub use v010::*;
impl Object {
/// Initialize an Object struct from parts
diff --git a/src/model/s3/version_table.rs b/src/model/s3/version_table.rs
index 5c032f9f..b4662a55 100644
--- a/src/model/s3/version_table.rs
+++ b/src/model/s3/version_table.rs
@@ -44,7 +44,8 @@ mod v05 {
pub struct VersionBlockKey {
/// Number of the part
pub part_number: u64,
- /// Offset of this sub-segment in its part
+ /// Offset of this sub-segment in its part as sent by the client
+ /// (before any kind of compression or encryption)
pub offset: u64,
}
@@ -53,7 +54,7 @@ mod v05 {
pub struct VersionBlock {
/// Blake2 sum of the block
pub hash: Hash,
- /// Size of the block
+ /// Size of the block, before any kind of compression or encryption
pub size: u64,
}